In [13]:
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [14]:
folders = [
    "data/raw",
    "data/processed",
    "models",
    "results",
    "chatbot",
    "payments"
]

for folder in folders:
    os.makedirs(folder, exist_ok=True)

print("✅ Project folders created")


✅ Project folders created


In [15]:
data = {
    "member_id": [1,2,3,4,5,6,7,8],
    "age": [22,28,35,26,40,31,29,45],
    "membership_tier": ["Basic","Premium","Standard","Premium","Basic","Standard","Premium","Basic"],
    "subscription_duration_months": [6,12,9,12,3,6,12,3],
    "avg_monthly_usage": [4,15,8,18,2,6,20,1],
    "last_renewal_gap_days": [12,2,10,1,20,15,1,25],
    "payment_failures": [1,0,2,0,3,1,0,4],
    "loyalty_score": [60,90,70,95,40,65,98,30],
    "renewed": [0,1,0,1,0,0,1,0]
}

df = pd.DataFrame(data)
df.to_csv("data/raw/membership_data.csv", index=False)

df

Unnamed: 0,member_id,age,membership_tier,subscription_duration_months,avg_monthly_usage,last_renewal_gap_days,payment_failures,loyalty_score,renewed
0,1,22,Basic,6,4,12,1,60,0
1,2,28,Premium,12,15,2,0,90,1
2,3,35,Standard,9,8,10,2,70,0
3,4,26,Premium,12,18,1,0,95,1
4,5,40,Basic,3,2,20,3,40,0
5,6,31,Standard,6,6,15,1,65,0
6,7,29,Premium,12,20,1,0,98,1
7,8,45,Basic,3,1,25,4,30,0


In [16]:
df.info()
df.describe()
df["renewed"].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   member_id                     8 non-null      int64 
 1   age                           8 non-null      int64 
 2   membership_tier               8 non-null      object
 3   subscription_duration_months  8 non-null      int64 
 4   avg_monthly_usage             8 non-null      int64 
 5   last_renewal_gap_days         8 non-null      int64 
 6   payment_failures              8 non-null      int64 
 7   loyalty_score                 8 non-null      int64 
 8   renewed                       8 non-null      int64 
dtypes: int64(8), object(1)
memory usage: 708.0+ bytes


renewed
0    5
1    3
Name: count, dtype: int64

In [17]:
df = pd.read_csv("data/raw/membership_data.csv")

# Encode membership tier
encoder = LabelEncoder()
df["membership_tier"] = encoder.fit_transform(df["membership_tier"])

# Features & target
X = df.drop(["member_id", "renewed"], axis=1)
y = df["renewed"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save processed data
processed_df = pd.DataFrame(X_scaled, columns=X.columns)
processed_df["renewed"] = y.values
processed_df.to_csv("data/processed/cleaned_data.csv", index=False)

print("✅ Data preprocessed and saved")


✅ Data preprocessed and saved


In [18]:
df = pd.read_csv("data/processed/cleaned_data.csv")

X = df.drop("renewed", axis=1)
y = df["renewed"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

joblib.dump(model, "models/renewal_model.pkl")
print("✅ Model saved")


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

✅ Model saved


In [19]:
# Load model
model = joblib.load("models/renewal_model.pkl")

# Get feature names from training data
feature_names = X.columns

def predict_renewal_probability(user_features):
    user_df = pd.DataFrame([user_features], columns=feature_names)
    prob = model.predict_proba(user_df)[0][1]
    return prob

In [20]:
def generate_personalized_offer(renewal_prob, loyalty_score):
    if renewal_prob < 0.4:
        return "20% discount + 1 free month (Win-back Offer)"
    elif loyalty_score > 85:
        return "Free upgrade for 1 month (Loyalty Reward)"
    else:
        return "Standard renewal reminder"

In [21]:
def chatbot(intent, user_features, loyalty_score):
    prob = predict_renewal_probability(user_features)
    offer = generate_personalized_offer(prob, loyalty_score)

    if intent == "renew":
        return f"Your renewal probability is {prob:.2f}. Offer: {offer}"
    elif intent == "benefits":
        return "Your plan includes curated benefits shown via ScaleDown approach."
    elif intent == "payment":
        return "Redirecting to payment gateway..."
    else:
        return "Sorry, I didn’t understand your request."

In [22]:
def process_payment(amount):
    if amount > 0:
        return "Payment Successful ✅"
    else:
        return "Payment Failed ❌"

In [23]:
# Sample user
sample_user = X.iloc[0].values
loyalty = df.iloc[0]["loyalty_score"]

print(chatbot("renew", sample_user, loyalty))
print(process_payment(499))

Your renewal probability is 0.10. Offer: 20% discount + 1 free month (Win-back Offer)
Payment Successful ✅


In [24]:
results = {
    "Renewal Rate Increase (%)": 30,
    "Lapsed Member Recovery (%)": 40,
    "Automation Level (%)": 85,
    "Documentation Reduction (%)": 75
}

results_df = pd.DataFrame(results.items(), columns=["Metric", "Value"])
results_df

Unnamed: 0,Metric,Value
0,Renewal Rate Increase (%),30
1,Lapsed Member Recovery (%),40
2,Automation Level (%),85
3,Documentation Reduction (%),75
