In [1]:
import os
import time
import warnings
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

from sklearn.ensemble        import RandomForestClassifier
from sklearn.linear_model    import LogisticRegression
from sklearn.pipeline        import Pipeline
from sklearn.compose         import ColumnTransformer
from sklearn.preprocessing   import StandardScaler, OneHotEncoder
from sklearn.model_selection import (
    train_test_split,
    RandomizedSearchCV,
    StratifiedKFold,
    cross_val_score,
)
from sklearn.metrics import (
    roc_auc_score, roc_curve,
    confusion_matrix, classification_report,
    precision_score, recall_score, f1_score,
)

warnings.filterwarnings("ignore")
np.random.seed(42)

OUTPUT_DIR    = "/content/model_outputs"
DATA_PATH     = "/content/drive/MyDrive/rfm_features.csv"
SNAPSHOT_DATE = pd.Timestamp("2024-06-30")

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [2]:
print("=" * 60)
print("STEP 1 — Loading data")
print("=" * 60)

df = pd.read_csv(DATA_PATH, parse_dates=["last_order_date", "registration_date"])

print(f"  Rows loaded       : {len(df):,}")
print(f"  Churned (label=1) : {df['churned'].sum():,}  ({df['churned'].mean()*100:.1f}%)")
print(f"  Retained (label=0): {(df['churned']==0).sum():,}  ({(df['churned']==0).mean()*100:.1f}%)")


STEP 1 — Loading data
  Rows loaded       : 5,000
  Churned (label=1) : 3,184  (63.7%)
  Retained (label=0): 1,816  (36.3%)


In [3]:
print("\n" + "=" * 60)
print("STEP 2 — Feature engineering")
print("=" * 60)

# Tenure in days — how long the customer has been registered
df["tenure_days"] = (SNAPSHOT_DATE - df["registration_date"]).dt.days

# Average monthly order rate — normalises frequency by tenure
df["monthly_order_rate"] = df["frequency"] / (df["tenure_days"] / 30).clip(lower=1)

# Spend concentration — avg order value relative to total monetary value
df["spend_per_order_ratio"] = df["avg_order_value"] / df["monetary"].clip(lower=1)

# Engagement breadth — distinct categories relative to total orders
df["category_breadth_ratio"] = df["distinct_categories"] / df["frequency"].clip(lower=1)

# Discount dependency — total discounts as proportion of gross spend
df["discount_dependency"] = df["total_discounts"] / (
    df["monetary"] + df["total_discounts"]
).clip(lower=1)

engineered = [
    "tenure_days", "monthly_order_rate", "spend_per_order_ratio",
    "category_breadth_ratio", "discount_dependency",
]
print("  Engineered features:")
for feat in engineered:
    print(f"    + {feat}")


STEP 2 — Feature engineering
  Engineered features:
    + tenure_days
    + monthly_order_rate
    + spend_per_order_ratio
    + category_breadth_ratio
    + discount_dependency


In [4]:
print("\n" + "=" * 60)
print("STEP 3 — Defining feature set")
print("=" * 60)

NUMERIC_FEATURES = [
    # Core RFM
    "recency_days", "frequency", "monetary",
    # Order behaviour
    "avg_order_value", "avg_delivery_days", "avg_review_score",
    # Refund behaviour
    "refund_rate", "refund_count",
    # Engagement
    "distinct_categories", "total_discounts",
    # RFM scores
    "R_score", "F_score", "M_score",
    # Engineered
    "tenure_days", "monthly_order_rate", "spend_per_order_ratio",
    "category_breadth_ratio", "discount_dependency",
]

CATEGORICAL_FEATURES = ["state", "age_group"]


STEP 3 — Defining feature set


In [5]:
TARGET = "churned"

X = df[NUMERIC_FEATURES + CATEGORICAL_FEATURES].copy()
y = df[TARGET].copy()

print(f"  Numeric features    : {len(NUMERIC_FEATURES)}")
print(f"  Categorical features: {len(CATEGORICAL_FEATURES)}")
print(f"  Total features      : {len(NUMERIC_FEATURES) + len(CATEGORICAL_FEATURES)}")
print(f"  Target              : {TARGET}")


  Numeric features    : 18
  Categorical features: 2
  Total features      : 20
  Target              : churned


In [6]:
print("\n" + "=" * 60)
print("STEP 4 — Train/test split")
print("=" * 60)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)

print(f"  Split method : Stratified 80/20")
print(f"  Train set    : {len(X_train):,} customers  (churn rate: {y_train.mean()*100:.1f}%)")
print(f"  Test  set    : {len(X_test):,}  customers  (churn rate: {y_test.mean()*100:.1f}%)")



STEP 4 — Train/test split
  Split method : Stratified 80/20
  Train set    : 4,000 customers  (churn rate: 63.7%)
  Test  set    : 1,000  customers  (churn rate: 63.7%)


In [7]:
print("\n" + "=" * 60)
print("STEP 5 — Building preprocessing pipeline")
print("=" * 60)

def make_preprocessor():
    """Return a fresh ColumnTransformer — needed for each pipeline clone."""
    return ColumnTransformer(transformers=[
        ("num", StandardScaler(),
         NUMERIC_FEATURES),
        ("cat", OneHotEncoder(drop="first", handle_unknown="ignore"),
         CATEGORICAL_FEATURES),
    ], remainder="drop")

print("  Numeric      → StandardScaler")
print("  Categorical  → OneHotEncoder (drop='first')")


STEP 5 — Building preprocessing pipeline
  Numeric      → StandardScaler
  Categorical  → OneHotEncoder (drop='first')


In [8]:
print("\n" + "=" * 60)
print("STEP 6 — Training baseline models")
print("=" * 60)

# --- 6.1  Default Random Forest (no tuning) ---
rf_default = Pipeline([
    ("preprocessor", make_preprocessor()),
    ("classifier",   RandomForestClassifier(
        n_estimators     = 200,
        max_depth        = 10,
        min_samples_leaf = 5,
        class_weight     = "balanced",
        random_state     = 42,
        n_jobs           = -1,
    )),
])
print("  Training default Random Forest...")
rf_default.fit(X_train, y_train)
rf_default_auc = roc_auc_score(y_test, rf_default.predict_proba(X_test)[:, 1])
print(f"  Done  →  Test AUC: {rf_default_auc:.4f}")

# --- 6.2  Logistic Regression (linear baseline) ---
lr_pipeline = Pipeline([
    ("preprocessor", make_preprocessor()),
    ("classifier",   LogisticRegression(
        class_weight = "balanced",
        max_iter     = 1000,
        random_state = 42,
    )),
])
print("  Training Logistic Regression...")
lr_pipeline.fit(X_train, y_train)
lr_auc = roc_auc_score(y_test, lr_pipeline.predict_proba(X_test)[:, 1])
print(f"  Done  →  Test AUC: {lr_auc:.4f}")



STEP 6 — Training baseline models
  Training default Random Forest...
  Done  →  Test AUC: 0.9746
  Training Logistic Regression...
  Done  →  Test AUC: 0.9746


In [9]:
print("\n" + "=" * 60)
print("STEP 7 — Hyperparameter tuning (RandomizedSearchCV)")
print("=" * 60)

# --- 7.1  Define the search space ---
# All keys are prefixed "classifier__" because they sit inside a Pipeline.
param_distributions = {

    # Number of trees — more = more stable predictions, slower to train
    "classifier__n_estimators":       [100, 200, 300, 500],

    # Maximum tree depth — primary regularisation lever
    # None = trees grow until pure leaves (risk of overfitting)
    "classifier__max_depth":          [5, 8, 10, 15, 20, None],

    # Minimum samples required to attempt a node split
    "classifier__min_samples_split":  [2, 5, 10, 20],

    # Minimum samples required at a leaf — smooths decision boundary
    "classifier__min_samples_leaf":   [1, 2, 5, 10],

    # Features considered at each split — controls tree diversity
    # "sqrt" is the RF default; "log2" is more aggressive; floats = fraction
    "classifier__max_features":       ["sqrt", "log2", 0.5, 0.8],

    # Bootstrap sampling — False uses full training set for each tree
    "classifier__bootstrap":          [True, False],

    # Class weighting — critical for the 63.7% churn imbalance
    # "balanced_subsample" reweights within each bootstrap sample
    "classifier__class_weight":       ["balanced", "balanced_subsample"],
}

total_combinations = (
    len(param_distributions["classifier__n_estimators"])
    * len(param_distributions["classifier__max_depth"])
    * len(param_distributions["classifier__min_samples_split"])
    * len(param_distributions["classifier__min_samples_leaf"])
    * len(param_distributions["classifier__max_features"])
    * len(param_distributions["classifier__bootstrap"])
    * len(param_distributions["classifier__class_weight"])
)

# --- 7.2  Cross-validation strategy ---
# StratifiedKFold preserves the churn ratio in every fold — essential for
# imbalanced datasets. 5 folds gives stable estimates without excessive runtime.
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# --- 7.3  Build the tuning pipeline ---
tuning_pipeline = Pipeline([
    ("preprocessor", make_preprocessor()),
    ("classifier",   RandomForestClassifier(random_state=42, n_jobs=-1)),
])

# --- 7.4  Run RandomizedSearchCV ---
# n_iter=50 samples 50 random combinations from the full space — sufficient
# to identify high-performing regions without exhaustive grid search.
# refit=True means the best estimator is automatically refit on the full
# training set and is ready to predict straight away.
print(f"  Search space:")
print(f"    Parameters          : {len(param_distributions)}")
print(f"    Total combinations  : {total_combinations:,}")
print(f"    Random trials       : 50")
print(f"    CV folds            : 5 (StratifiedKFold)")
print(f"    Scoring metric      : AUC-ROC")
print(f"  Running search — this takes 1–3 minutes...")

t0 = time.time()
random_search = RandomizedSearchCV(
    estimator           = tuning_pipeline,
    param_distributions = param_distributions,
    n_iter              = 50,
    scoring             = "roc_auc",
    cv                  = cv_strategy,
    refit               = True,
    return_train_score  = True,
    random_state        = 42,
    n_jobs              = -1,
    verbose             = 0,
)
random_search.fit(X_train, y_train)
tuning_time = time.time() - t0

# --- 7.5  Extract results ---
rf_tuned    = random_search.best_estimator_
best_params = random_search.best_params_
best_cv_auc = random_search.best_score_
rf_tuned_auc = roc_auc_score(y_test, rf_tuned.predict_proba(X_test)[:, 1])

print(f"  Completed in {tuning_time:.0f}s")
print(f"\n  Best CV AUC (5-fold) : {best_cv_auc:.4f}")
print(f"  Tuned test AUC       : {rf_tuned_auc:.4f}")
print(f"  Default test AUC     : {rf_default_auc:.4f}")
delta = rf_tuned_auc - rf_default_auc
print(f"  Improvement          : {'+' if delta >= 0 else ''}{delta*100:.3f} pp")

print(f"\n  Best hyperparameters:")
for param, value in sorted(best_params.items()):
    print(f"    {param.replace('classifier__', ''):<25} : {value}")

# --- 7.6  Save all 50 trial results ---
results_df = pd.DataFrame(random_search.cv_results_)
keep_cols  = (
    [c for c in results_df.columns if c.startswith("param_")]
    + ["mean_test_score", "std_test_score", "mean_train_score", "rank_test_score"]
)
results_df = (
    results_df[keep_cols]
    .rename(columns=lambda c: c.replace("param_classifier__", ""))
    .sort_values("rank_test_score")
    .reset_index(drop=True)
)
results_path = os.path.join(OUTPUT_DIR, "tuning_results.csv")
results_df.to_csv(results_path, index=False)
print(f"\n  All 50 trial results saved: {results_path}")

# --- 7.7  Cross-validate tuned model to confirm generalisation ---
print(f"\n  5-fold CV on tuned model (overfitting check):")
cv_scores = cross_val_score(
    rf_tuned, X_train, y_train,
    cv=cv_strategy, scoring="roc_auc", n_jobs=-1
)
print(f"    Fold AUCs : {[round(s, 4) for s in cv_scores]}")
print(f"    Mean AUC  : {cv_scores.mean():.4f}")
print(f"    Std AUC   : {cv_scores.std():.4f}  "
      f"({'stable' if cv_scores.std() < 0.02 else 'high variance — review'})")



STEP 7 — Hyperparameter tuning (RandomizedSearchCV)
  Search space:
    Parameters          : 7
    Total combinations  : 6,144
    Random trials       : 50
    CV folds            : 5 (StratifiedKFold)
    Scoring metric      : AUC-ROC
  Running search — this takes 1–3 minutes...
  Completed in 896s

  Best CV AUC (5-fold) : 0.9818
  Tuned test AUC       : 0.9753
  Default test AUC     : 0.9746
  Improvement          : +0.070 pp

  Best hyperparameters:
    bootstrap                 : True
    class_weight              : balanced_subsample
    max_depth                 : None
    max_features              : 0.5
    min_samples_leaf          : 10
    min_samples_split         : 20
    n_estimators              : 200

  All 50 trial results saved: /content/model_outputs/tuning_results.csv

  5-fold CV on tuned model (overfitting check):
    Fold AUCs : [np.float64(0.982), np.float64(0.9833), np.float64(0.975), np.float64(0.9841), np.float64(0.9845)]
    Mean AUC  : 0.9818
    Std AUC  

In [10]:
print("\n" + "=" * 60)
print("STEP 8 — Model evaluation")
print("=" * 60)

rf_tuned_proba   = rf_tuned.predict_proba(X_test)[:, 1]
rf_tuned_pred    = rf_tuned.predict(X_test)
rf_default_proba = rf_default.predict_proba(X_test)[:, 1]
rf_default_pred  = rf_default.predict(X_test)
lr_proba         = lr_pipeline.predict_proba(X_test)[:, 1]
lr_pred          = lr_pipeline.predict(X_test)

def compute_metrics(y_true, y_pred, y_proba):
    n_decile = max(1, int(len(y_true) * 0.10))
    top_idx  = np.argsort(y_proba)[::-1][:n_decile]
    return {
        "auc"       : roc_auc_score(y_true, y_proba),
        "precision" : precision_score(y_true, y_pred),
        "recall"    : recall_score(y_true, y_pred),
        "f1"        : f1_score(y_true, y_pred),
        "prec_d10"  : y_true.iloc[top_idx].mean(),
    }

m_tuned   = compute_metrics(y_test, rf_tuned_pred,   rf_tuned_proba)
m_default = compute_metrics(y_test, rf_default_pred,  rf_default_proba)
m_lr      = compute_metrics(y_test, lr_pred,          lr_proba)

metric_rows = [
    ("AUC-ROC",              "auc"),
    ("Precision",            "precision"),
    ("Recall",               "recall"),
    ("F1 Score",             "f1"),
    ("Precision @ Decile 1", "prec_d10"),
]

print(f"\n  {'Metric':<28} {'RF Tuned':>12} {'RF Default':>12} {'Log Reg':>12}")
print(f"  {'-'*66}")
for label, key in metric_rows:
    print(f"  {label:<28} {m_tuned[key]:>12.4f} "
          f"{m_default[key]:>12.4f} {m_lr[key]:>12.4f}")

if m_tuned["auc"] >= 0.75:
    print(f"\n  ✓ Tuned AUC {m_tuned['auc']:.4f} meets target threshold of 0.75")
else:
    print(f"\n  ✗ Tuned AUC {m_tuned['auc']:.4f} below target — review features")


STEP 8 — Model evaluation

  Metric                           RF Tuned   RF Default      Log Reg
  ------------------------------------------------------------------
  AUC-ROC                            0.9753       0.9746       0.9746
  Precision                          0.9377       0.9412       0.9487
  Recall                             0.9451       0.9294       0.8995
  F1 Score                           0.9414       0.9352       0.9234
  Precision @ Decile 1               1.0000       1.0000       1.0000

  ✓ Tuned AUC 0.9753 meets target threshold of 0.75


In [11]:
print("\n" + "=" * 60)
print("STEP 9 — Feature importance (tuned model)")
print("=" * 60)

tuned_clf    = rf_tuned.named_steps["classifier"]
ohe_feats    = (rf_tuned.named_steps["preprocessor"]
                .named_transformers_["cat"]
                .get_feature_names_out(CATEGORICAL_FEATURES))
all_features = NUMERIC_FEATURES + list(ohe_feats)

importance_df = pd.DataFrame({
    "feature":    all_features,
    "importance": tuned_clf.feature_importances_,
}).sort_values("importance", ascending=False).reset_index(drop=True)

print("\n  Top 10 most important features:")
for _, row in importance_df.head(10).iterrows():
    bar = "█" * int(row["importance"] * 200)
    print(f"    {row['feature']:<30} {row['importance']:.4f}  {bar}")


STEP 9 — Feature importance (tuned model)

  Top 10 most important features:
    recency_days                   0.5162  ███████████████████████████████████████████████████████████████████████████████████████████████████████
    R_score                        0.2514  ██████████████████████████████████████████████████
    spend_per_order_ratio          0.0798  ███████████████
    frequency                      0.0342  ██████
    monthly_order_rate             0.0325  ██████
    tenure_days                    0.0191  ███
    F_score                        0.0147  ██
    avg_order_value                0.0075  █
    monetary                       0.0070  █
    total_discounts                0.0063  █


In [12]:
print("\n" + "=" * 60)
print("STEP 10 — Generating evaluation charts")
print("=" * 60)

NAVY  = "#1B2A4A"
AMBER = "#F5A623"
GREEN = "#10B981"
RED   = "#EF4444"
TEAL  = "#0EA5E9"
GRAY  = "#6B7280"

fig = plt.figure(figsize=(20, 14))
fig.patch.set_facecolor("white")
gs  = gridspec.GridSpec(2, 3, figure=fig, hspace=0.45, wspace=0.35)

# ── Chart 1: ROC Curves — all three models ───────────────────────────────────
ax1 = fig.add_subplot(gs[0, 0])
for proba, label, colour, ls in [
    (rf_tuned_proba,   f"RF Tuned   (AUC={m_tuned['auc']:.3f})",   NAVY,  "-"),
    (rf_default_proba, f"RF Default (AUC={m_default['auc']:.3f})", AMBER, "--"),
    (lr_proba,         f"Log Reg    (AUC={m_lr['auc']:.3f})",      TEAL,  ":"),
]:
    fpr, tpr, _ = roc_curve(y_test, proba)
    ax1.plot(fpr, tpr, color=colour, lw=2, linestyle=ls, label=label)
ax1.plot([0,1], [0,1], color=GRAY, lw=1, linestyle=":")
ax1.axhline(0.75, color=GREEN, lw=1, linestyle="--", alpha=0.5, label="Target (0.75)")
ax1.set_title("ROC Curves — All Models", fontsize=12, fontweight="bold",
              color=NAVY, pad=10)
ax1.set_xlabel("False Positive Rate", fontsize=10, color=GRAY)
ax1.set_ylabel("True Positive Rate",  fontsize=10, color=GRAY)
ax1.legend(fontsize=8, framealpha=0.9)
ax1.set_facecolor("#FAFBFD")
ax1.grid(True, alpha=0.3, color=GRAY)

# ── Chart 2: Confusion Matrix — tuned model ──────────────────────────────────
ax2 = fig.add_subplot(gs[0, 1])
cm = confusion_matrix(y_test, rf_tuned_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Pred: Retained", "Pred: Churned"],
            yticklabels=["Act:  Retained", "Act:  Churned"],
            ax=ax2, linewidths=0.5, linecolor="white",
            annot_kws={"size": 13, "weight": "bold"})
ax2.set_title("Confusion Matrix — RF Tuned", fontsize=12, fontweight="bold",
              color=NAVY, pad=10)
ax2.tick_params(colors=GRAY, labelsize=9)

# ── Chart 3: Metric Comparison Bar ───────────────────────────────────────────
ax3 = fig.add_subplot(gs[0, 2])
metric_labels_short = ["AUC-ROC", "Precision", "Recall", "F1"]
metric_keys_short   = ["auc", "precision", "recall", "f1"]
x     = np.arange(len(metric_labels_short))
width = 0.25

for i, (m_dict, label, colour) in enumerate([
    (m_tuned,   "RF Tuned",   NAVY),
    (m_default, "RF Default", AMBER),
    (m_lr,      "Log Reg",    TEAL),
]):
    vals = [m_dict[k] for k in metric_keys_short]
    bars = ax3.bar(x + (i - 1) * width, vals, width,
                   label=label, color=colour, alpha=0.9, edgecolor="white")
    for bar in bars:
        ax3.text(bar.get_x() + bar.get_width() / 2,
                 bar.get_height() + 0.015,
                 f"{bar.get_height():.2f}",
                 ha="center", va="bottom", fontsize=7,
                 color=colour, fontweight="bold")

ax3.axhline(0.75, color=GREEN, lw=1.5, linestyle="--",
            label="Target (0.75)", alpha=0.8)
ax3.set_title("Model Comparison", fontsize=12, fontweight="bold",
              color=NAVY, pad=10)
ax3.set_xticks(x)
ax3.set_xticklabels(metric_labels_short, fontsize=9, color=GRAY)
ax3.set_ylim(0, 1.15)
ax3.set_facecolor("#FAFBFD")
ax3.legend(fontsize=8)
ax3.grid(True, axis="y", alpha=0.3, color=GRAY)
ax3.tick_params(colors=GRAY)

# ── Chart 4: Top 15 Feature Importances ──────────────────────────────────────
ax4 = fig.add_subplot(gs[1, :2])
top15   = importance_df.head(15).sort_values("importance")
colours = [AMBER if i >= 12 else NAVY for i in range(len(top15))]
bars    = ax4.barh(top15["feature"], top15["importance"],
                   color=colours, alpha=0.9, edgecolor="white")
ax4.set_title("Top 15 Feature Importances — RF Tuned",
              fontsize=12, fontweight="bold", color=NAVY, pad=10)
ax4.set_xlabel("Importance", fontsize=10, color=GRAY)
ax4.set_facecolor("#FAFBFD")
ax4.grid(True, axis="x", alpha=0.3, color=GRAY)
ax4.tick_params(colors=GRAY, labelsize=9)
for bar in bars:
    ax4.text(bar.get_width() + 0.001,
             bar.get_y() + bar.get_height() / 2,
             f"{bar.get_width():.4f}",
             va="center", fontsize=8, color=GRAY)

# ── Chart 5: Tuning — CV AUC distribution across 50 trials ──────────────────
ax5 = fig.add_subplot(gs[1, 2])
trial_aucs = results_df["mean_test_score"].dropna().values
ax5.hist(trial_aucs, bins=20, color=NAVY, alpha=0.8, edgecolor="white")
ax5.axvline(rf_default_auc, color=AMBER, lw=2, linestyle="--",
            label=f"Default  ({rf_default_auc:.4f})")
ax5.axvline(rf_tuned_auc,   color=GREEN, lw=2, linestyle="-",
            label=f"Tuned    ({rf_tuned_auc:.4f})")
ax5.set_title("Tuning — CV AUC Distribution\n(50 Random Trials)",
              fontsize=12, fontweight="bold", color=NAVY, pad=10)
ax5.set_xlabel("Mean CV AUC (5-fold)", fontsize=10, color=GRAY)
ax5.set_ylabel("Trial Count",          fontsize=10, color=GRAY)
ax5.legend(fontsize=9)
ax5.set_facecolor("#FAFBFD")
ax5.grid(True, alpha=0.3, color=GRAY)
ax5.tick_params(colors=GRAY)

# ── Suptitle & footer ─────────────────────────────────────────────────────────
fig.suptitle(
    "E-Commerce Customer Churn Prediction — Model Evaluation (v2.0)",
    fontsize=16, fontweight="bold", color=NAVY, y=1.01
)
fig.text(
    0.5, -0.01,
    (f"Josiah Nwosu  |  February 2026  |  "
     f"RF Tuned: n_est={best_params.get('classifier__n_estimators','?')}  "
     f"max_depth={best_params.get('classifier__max_depth','?')}  "
     f"min_leaf={best_params.get('classifier__min_samples_leaf','?')}  "
     f"max_feat={best_params.get('classifier__max_features','?')}"),
    ha="center", fontsize=8, color=GRAY,
)

chart_path = os.path.join(OUTPUT_DIR, "model_evaluation.png")
fig.savefig(chart_path, dpi=150, bbox_inches="tight", facecolor="white")
plt.close()
print(f"  Saved: {chart_path}")


STEP 10 — Generating evaluation charts
  Saved: /content/model_outputs/model_evaluation.png


In [13]:
print("\n" + "=" * 60)
print("STEP 11 — Exporting churn predictions (tuned model)")
print("=" * 60)

all_proba = rf_tuned.predict_proba(X)[:, 1]
all_pred  = rf_tuned.predict(X)

predictions = df[[
    "customer_id", "segment", "state", "age_group",
    "churned", "recency_days", "frequency", "monetary",
    "avg_order_value", "avg_review_score", "refund_rate",
    "R_score", "F_score", "M_score", "RFM_score",
]].copy()

predictions["churn_probability"]      = np.round(all_proba, 4)
predictions["churn_predicted"]        = all_pred
predictions["revenue_at_risk_3m_ngn"] = np.round(
    (df["monetary"] / df["frequency"].clip(lower=1)) * 3, 0
)

def health_status(row):
    if row["churned"]           == 1:    return "Churned"
    if row["churn_probability"] >= 0.75: return "High Risk"
    if row["churn_probability"] >= 0.50: return "Medium Risk"
    if row["churn_probability"] >= 0.25: return "Low Risk"
    return "Active"

predictions["health_status"] = predictions.apply(health_status, axis=1)
predictions = predictions.sort_values("revenue_at_risk_3m_ngn", ascending=False)

pred_path = os.path.join(OUTPUT_DIR, "churn_predictions.csv")
predictions.to_csv(pred_path, index=False)

print(f"  Saved : {pred_path}")
print(f"  Rows  : {len(predictions):,}")
print("\n  Health status distribution:")
print(predictions["health_status"].value_counts().to_string())


STEP 11 — Exporting churn predictions (tuned model)
  Saved : /content/model_outputs/churn_predictions.csv
  Rows  : 5,000

  Health status distribution:
health_status
Churned        3184
Active         1544
Low Risk        137
Medium Risk      95
High Risk        40


In [14]:
print("\n" + "=" * 60)
print("STEP 12 — Saving classification report")
print("=" * 60)

report      = classification_report(
    y_test, rf_tuned_pred,
    target_names=["Retained (0)", "Churned (1)"]
)
report_path = os.path.join(OUTPUT_DIR, "classification_report.txt")
with open(report_path, "w") as f:
    f.write("E-COMMERCE CUSTOMER CHURN PREDICTION\n")
    f.write("Classification Report — RF Tuned (v2.0)\n")
    f.write("Josiah Nwosu | February 2026\n")
    f.write("=" * 55 + "\n\n")
    f.write(report)
print(f"  Saved: {report_path}")



STEP 12 — Saving classification report
  Saved: /content/model_outputs/classification_report.txt


In [15]:
print("\n" + "=" * 60)
print("STEP 13 — Saving model summary")
print("=" * 60)

summary_path = os.path.join(OUTPUT_DIR, "model_summary.txt")
with open(summary_path, "w") as f:
    f.write("=" * 60 + "\n")
    f.write("E-COMMERCE CUSTOMER CHURN PREDICTION\n")
    f.write("Model Summary — v2.0 (with hyperparameter tuning)\n")
    f.write("Author : Josiah Nwosu\n")
    f.write("Date   : February 2026\n")
    f.write("=" * 60 + "\n\n")

    f.write("DATASET\n")
    f.write("-" * 40 + "\n")
    f.write(f"Total customers      : {len(df):,}\n")
    f.write(f"Churned  (label=1)   : {df['churned'].sum():,} ({df['churned'].mean()*100:.1f}%)\n")
    f.write(f"Retained (label=0)   : {(df['churned']==0).sum():,} ({(df['churned']==0).mean()*100:.1f}%)\n")
    f.write(f"Train set            : {len(X_train):,} customers\n")
    f.write(f"Test  set            : {len(X_test):,}  customers\n")
    f.write(f"Split method         : Stratified 80/20\n\n")

    f.write("TUNING CONFIGURATION\n")
    f.write("-" * 40 + "\n")
    f.write(f"Method               : RandomizedSearchCV\n")
    f.write(f"Trials (n_iter)      : 50\n")
    f.write(f"CV strategy          : StratifiedKFold (5 folds)\n")
    f.write(f"Scoring metric       : AUC-ROC\n")
    f.write(f"Search time          : {tuning_time:.0f}s\n")
    f.write(f"Total combinations   : {total_combinations:,}\n\n")

    f.write("BEST HYPERPARAMETERS FOUND\n")
    f.write("-" * 40 + "\n")
    for param, value in sorted(best_params.items()):
        f.write(f"  {param.replace('classifier__', ''):<25} : {value}\n")
    f.write("\n")

    f.write("CV STABILITY — TUNED MODEL\n")
    f.write("-" * 40 + "\n")
    for i, s in enumerate(cv_scores, 1):
        f.write(f"  Fold {i} AUC : {s:.4f}\n")
    f.write(f"  Mean      : {cv_scores.mean():.4f}\n")
    f.write(f"  Std       : {cv_scores.std():.4f}  "
            f"({'stable' if cv_scores.std() < 0.02 else 'high variance'})\n\n")

    f.write("EVALUATION RESULTS\n")
    f.write("-" * 40 + "\n")
    f.write(f"{'Metric':<28} {'RF Tuned':>10} {'RF Default':>12} {'Log Reg':>10}\n")
    for label, key in metric_rows:
        f.write(f"  {label:<26} {m_tuned[key]:>10.4f} "
                f"{m_default[key]:>12.4f} {m_lr[key]:>10.4f}\n")
    f.write("\n")

    f.write("TOP 10 MOST IMPORTANT FEATURES — TUNED MODEL\n")
    f.write("-" * 40 + "\n")
    for i, row in importance_df.head(10).iterrows():
        f.write(f"  {i+1:>2}. {row['feature']:<30} {row['importance']:.4f}\n")

    f.write("\nOUTPUT FILES\n")
    f.write("-" * 40 + "\n")
    f.write("  churn_predictions.csv     — churn scores for all 5,000 customers\n")
    f.write("  model_evaluation.png      — ROC curves, confusion matrix, tuning chart\n")
    f.write("  tuning_results.csv        — all 50 RandomizedSearchCV trial results\n")
    f.write("  classification_report.txt — per-class precision, recall, F1\n")
    f.write("  model_summary.txt         — this file\n")

print(f"  Saved: {summary_path}")


STEP 13 — Saving model summary
  Saved: /content/model_outputs/model_summary.txt


In [16]:
print("\n" + "=" * 60)
print("ALL STEPS COMPLETE")
print("=" * 60)
print(f"\n  RF Default AUC   : {m_default['auc']:.4f}")
print(f"  RF Tuned   AUC   : {m_tuned['auc']:.4f}  ← final model")
print(f"  Log Reg    AUC   : {m_lr['auc']:.4f}")
print(f"  CV Mean AUC      : {cv_scores.mean():.4f}  (std: {cv_scores.std():.4f})")
print(f"\n  Best params      : {best_params}")
print(f"\n  Predictions      : {pred_path}")
print(f"  Charts           : {chart_path}")
print(f"  Tuning results   : {results_path}")
print("=" * 60)


ALL STEPS COMPLETE

  RF Default AUC   : 0.9746
  RF Tuned   AUC   : 0.9753  ← final model
  Log Reg    AUC   : 0.9746
  CV Mean AUC      : 0.9818  (std: 0.0035)

  Best params      : {'classifier__n_estimators': 200, 'classifier__min_samples_split': 20, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 0.5, 'classifier__max_depth': None, 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True}

  Predictions      : /content/model_outputs/churn_predictions.csv
  Charts           : /content/model_outputs/model_evaluation.png
  Tuning results   : /content/model_outputs/tuning_results.csv

  Next step: Load churn_predictions.csv into Power BI
