In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Create example data (replace this with your real dataset loading if you have one)
data_dict = {
    "Customer_Age": np.random.randint(21, 70, 500),
    "Gender": np.random.choice(["M", "F"], 500),
    "Income_Category": np.random.choice(["Low", "Medium", "High"], 500),
    "Credit_Limit": np.random.uniform(1000, 15000, 500),
    "Avg_Utilization_Ratio": np.random.uniform(0, 1, 500),
    "Total_Trans_Ct": np.random.randint(1, 120, 500),
    "Months_on_book": np.random.randint(1, 60, 500),
    "Churn": np.random.choice([0, 1], 500, p=[0.8, 0.2])  # 20% churn rate
}

data = pd.DataFrame(data_dict)

# Encode categorical columns
categorical_cols = ["Gender", "Income_Category"]
le = LabelEncoder()
for col in categorical_cols:
    data[col] = le.fit_transform(data[col])

# Prepare features and target
X = data.drop("Churn", axis=1)
y = data["Churn"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train RandomForest with balanced class weights
model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))
print("ROC AUC:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

# Save the model and scaler (overwrites old files)
joblib.dump(model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Retraining complete. Model and scaler saved as 'model.pkl' and 'scaler.pkl'.")

              precision    recall  f1-score   support

           0       0.81      1.00      0.90        81
           1       0.00      0.00      0.00        19

    accuracy                           0.81       100
   macro avg       0.41      0.50      0.45       100
weighted avg       0.66      0.81      0.72       100

ROC AUC: 0.5207927225471085
Retraining complete. Model and scaler saved as 'model.pkl' and 'scaler.pkl'.
