In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load Data
df = pd.read_csv("../data/Telco_customer_churn.csv")

# Drop Leakage & Noise
drop_cols = ['CustomerID', 'Count', 'Country', 'State', 'City', 'Zip Code', 'Lat Long', 'Latitude', 'Longitude', 'Churn Reason', 'Churn Value']
df.drop(columns=drop_cols, inplace=True)

# Clean & Rename
df.rename(columns={'Tenure Months': 'Tenure', 'Monthly Charges': 'MonthlyCharges', 'Total Charges': 'TotalCharges', 'Churn Label': 'Churn'}, inplace=True)
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce').fillna(0)

# Feature Engineering
def tenure_group(tenure):
    if tenure <= 12: return '0-1 Year'
    elif tenure <= 24: return '1-2 Years'
    elif tenure <= 48: return '2-4 Years'
    else: return '4+ Years'

df['TenureGroup'] = df['Tenure'].apply(tenure_group)
df = pd.get_dummies(df, drop_first=True)

In [3]:
# --- CELL: DAY 3 & 4 (Training & SHAP) ---
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import joblib
import pandas as pd

# 1. Prepare Data
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 2. Hyperparameter Tuning
param_grid = {'n_estimators': [100, 200], 'max_depth': [10, None]}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, scoring='recall')
grid.fit(X_train, y_train)
best_model = grid.best_estimator_

# 3. Save Model for VS Code Dashboard
joblib.dump(best_model, '../models/churn_model.pkl')
joblib.dump(list(X.columns), '../models/model_columns.pkl')

# 4. SHAP Explainability (Wrap in try-except to avoid crash)
try:
    import shap
    explainer = shap.TreeExplainer(best_model)
    shap_values = explainer.shap_values(X_test)
    
    # Plotting
    shap.summary_plot(shap_values[1], X_test)
except ImportError as e:
    print(f"SHAP Error: {e}. Please run 'pip install \"numpy<2.0\"' and restart kernel.")

SHAP Error: Numba needs NumPy 2.3 or less. Got NumPy 2.4.. Please run 'pip install "numpy<2.0"' and restart kernel.
