In [4]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt

churn = pd.read_csv('../data/churn.csv')


In [3]:
# Import libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, roc_curve

# --- Data prep ---
X = encoded.drop('Churn_flag', axis=1)
y = encoded['Churn_flag']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Scale numeric features (optional for logistic)
scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()
num_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
X_train_scaled[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test_scaled[num_cols] = scaler.transform(X_test[num_cols])

# --- 1️⃣ Logistic Regression (Baseline) ---
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)
y_prob_lr = lr.predict_proba(X_test_scaled)[:,1]

# --- 2️⃣ Random Forest (Advanced) ---
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_prob_rf = rf.predict_proba(X_test)[:,1]

# --- Metrics ---
def evaluate_model(y_true, y_pred, y_prob, model_name):
    print(f"\n🔹 {model_name} Performance:")
    print("Accuracy:", round(accuracy_score(y_true, y_pred),3))
    print("Precision:", round(precision_score(y_true, y_pred),3))
    print("Recall:", round(recall_score(y_true, y_pred),3))
    print("F1-score:", round(f1_score(y_true, y_pred),3))
    print("ROC-AUC:", round(roc_auc_score(y_true, y_prob),3))

evaluate_model(y_test, y_pred_lr, y_prob_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, y_prob_rf, "Random Forest")

# Add churn probability for deployment insight
predicted = X_test.copy()
predicted['Actual Churn'] = y_test.values
predicted['Predicted Probability (RF)'] = y_prob_rf
predicted['Predicted Churn'] = y_pred_rf
predicted[['Predicted Probability (RF)', 'Predicted Churn']].head()


NameError: name 'encoded' is not defined