### Importing Libraries

In [15]:
import pandas as pd 
from sklearn import metrics 
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score 
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

# Step 0: Load Dataset
**Purpose:** Load the cleaned dataset (with dummy variables, no multicollinearity issues) for modeling.  
**Goal:** Prepare `df` as the main DataFrame for train/test splitting.  
**Expected Outcome:** A DataFrame ready for model building, all features numeric.


In [18]:
df = pd.read_csv(r"D:\Data Analytics\PORTFOLIO PROJECTS\End to End Machine Learning Project\self\telco_data_dummies.csv") 
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Male,Partner_Yes,Dependents_Yes,PhoneService_Yes,MultipleLines_No phone service,...,Contract_Two year,PaperlessBilling_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,0,1,0,0,1,...,0,1,0,1,0,0,0,0,0,0
1,1,0,56.95,1889.5,0,1,0,0,1,0,...,0,0,0,0,1,0,1,0,0,0
2,2,0,53.85,108.15,1,1,0,0,1,0,...,0,1,0,0,1,0,0,0,0,0
3,3,0,42.3,1840.75,0,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
4,4,0,70.7,151.65,1,0,0,0,1,0,...,0,1,0,1,0,0,0,0,0,0


In [20]:
df.drop('Unnamed: 0', axis=1, inplace=True)
df.head()


Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Male,Partner_Yes,Dependents_Yes,PhoneService_Yes,MultipleLines_No phone service,MultipleLines_Yes,...,Contract_Two year,PaperlessBilling_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,0,0,1,0,0,1,0,...,0,1,0,1,0,0,0,0,0,0
1,0,56.95,1889.5,0,1,0,0,1,0,0,...,0,0,0,0,1,0,1,0,0,0
2,0,53.85,108.15,1,1,0,0,1,0,0,...,0,1,0,0,1,0,0,0,0,0
3,0,42.3,1840.75,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
4,0,70.7,151.65,1,0,0,0,1,0,0,...,0,1,0,1,0,0,0,0,0,0


# Step 1 – Train-Test Split
**Purpose:** Split data into training and testing sets while preserving class distribution.  
**Goal:** Create X_train, X_test, y_train, y_test with 80/20 split.  
**Expected Outcome:** Training and testing sets with same churn ratio as original data.




In [36]:
from sklearn.model_selection import train_test_split

# Define X and y
X = df.drop(columns=["Churn"])
y = df["Churn"]

# Split 80/20 with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y   # keeps churn ratio same in train/test
)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train distribution:\n", y_train.value_counts(normalize=True))
print("y_test distribution:\n", y_test.value_counts(normalize=True))


X_train shape: (5625, 34)
X_test shape: (1407, 34)
y_train distribution:
 Churn
0    0.734222
1    0.265778
Name: proportion, dtype: float64
y_test distribution:
 Churn
0    0.734186
1    0.265814
Name: proportion, dtype: float64


# Step 2: Baseline Dummy Classifier
**Purpose:** Establish a naive baseline by always predicting the majority class (non-churn).  
**Goal:** Evaluate minimum performance; any real model should outperform this.  

In [27]:

from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Use most frequent class as prediction (always predicts 0 = no churn)
baseline = DummyClassifier(strategy="most_frequent", random_state=42)
baseline.fit(X_train, y_train)

# Predict on test set
y_pred_baseline = baseline.predict(X_test)

# Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_baseline))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred_baseline, digits=4, zero_division=0
))


Confusion Matrix:
 [[1033    0]
 [ 374    0]]

Classification Report:
               precision    recall  f1-score   support

           0     0.7342    1.0000    0.8467      1033
           1     0.0000    0.0000    0.0000       374

    accuracy                         0.7342      1407
   macro avg     0.3671    0.5000    0.4234      1407
weighted avg     0.5390    0.7342    0.6217      1407



**Interpretation:**  
Confusion Matrix:
[[1033    0]
 [ 374    0]]

- Predicts all customers as non-churn.  
- Correctly identifies non-churn (1033 TN), fails to identify churn (0 TP).  

Classification Report:
- Accuracy: 0.7342 → appears good due to class imbalance.  
- Recall for churn = 0 → model misses all churned customers.  
- F1-score for churn = 0 → poor performance for minority class.  

✅ Key takeaway:  
- DummyClassifier sets the **baseline performance**.  
- Any real model must **improve recall/F1 for churn**, not just accuracy.


# Step 3: Decision Tree Classifier
**Purpose:** Train a simple tree-based model to predict churn, capturing non-linear patterns.  
**Goal:** Compare with baseline DummyClassifier; check if it starts detecting churned customers.  
**Note:** Decision Trees can overfit easily, so later we may tune max_depth or min_samples_leaf.  



In [41]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize Decision Tree
# class_weight="balanced" to handle class imbalance
dtree = DecisionTreeClassifier(random_state=42, class_weight="balanced")

# Train
dtree.fit(X_train, y_train)

# Predict on test set
y_pred_tree = dtree.predict(X_test)

# Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tree))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred_tree, digits=4, zero_division=0
))


Confusion Matrix:
 [[837 196]
 [188 186]]

Classification Report:
               precision    recall  f1-score   support

           0     0.8166    0.8103    0.8134      1033
           1     0.4869    0.4973    0.4921       374

    accuracy                         0.7271      1407
   macro avg     0.6517    0.6538    0.6527      1407
weighted avg     0.7290    0.7271    0.7280      1407



 **Interpretation: Decision Tree Classifier**

Confusion Matrix: TN=837, FP=196, FN=188, TP=186

Classification Report Highlights:
- Accuracy: 0.7271 → slight drop vs baseline.
- Recall for churn: 0.4973 (~50% of churned customers detected)
- Precision for churn: 0.4869
- F1-score for churn: 0.4921 → moderate performance for minority class.

Key Takeaways:
- Decision Tree captures patterns to detect churn.
- Accuracy drops slightly, but recall/F1 for churn improves significantly.
- Model is actionable and sets the stage for further improvement with ensemble methods.


# Step 4a: Random Forest Classifier
**Purpose:** Train an ensemble of decision trees to improve churn prediction over a single Decision Tree.  
**Goal:** Capture complex patterns, reduce overfitting, and improve recall/F1 for churned customers.  
**Notes:** 
- Use `class_weight="balanced"` to handle class imbalance.
- Default hyperparameters are a starting point; later can tune max_depth, n_estimators, min_samples_leaf, etc.


In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,       # 100 trees
    random_state=42,
    class_weight="balanced"
)

# Train on training data
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred_rf = rf_model.predict(X_test)

# Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred_rf, digits=4, zero_division=0
))


Confusion Matrix:
 [[915 118]
 [197 177]]

Classification Report:
               precision    recall  f1-score   support

           0     0.8228    0.8858    0.8531      1033
           1     0.6000    0.4733    0.5291       374

    accuracy                         0.7761      1407
   macro avg     0.7114    0.6795    0.6911      1407
weighted avg     0.7636    0.7761    0.7670      1407



**Interpretation: Random Forest Classifier**

Confusion Matrix:**  
TN=915, FP=118, FN=197, TP=177

Classification Report Highlights:
- Accuracy: 0.7761 → overall correctness improved over Decision Tree (0.7271)  
- Recall for churn: 0.4733 → ~47% of churned customers detected  
- Precision for churn: 0.6000 → more reliable predictions, fewer false alarms  
- F1-score for churn: 0.5291 → better balance between precision and recall  

Macro / Weighted Average:  
- Macro avg F1 = 0.6911 → average performance across both classes  
- Weighted avg F1 = 0.7670 → overall performance weighted by class sizes  

Key Takeaways:
- Random Forest improves precision and F1-score for churn over Decision Tree.  
- Accuracy improved while maintaining reasonable detection of churn.  
- Model is more robust and actionable for real-world churn prediction.


**Improvement / Next Steps**

Even though the Random Forest performs better than a single Decision Tree, churn recall/F1 can still improve. Possible steps:  

- **Hyperparameter Tuning:**  
  - Adjust `n_estimators`, `max_depth`, `min_samples_leaf`, `min_samples_split` to reduce overfitting and improve minority class detection.  

- **Class Balancing Techniques:**  
  - Oversampling (SMOTE) or undersampling to give the model more churn examples.  

- **Threshold Adjustment:**  
  - Change the probability cutoff for predicting churn to increase recall.  

- **Try Stronger Models:**  
  - Gradient Boosting, XGBoost, or LightGBM may capture more complex patterns and further boost F1 for churn.  

> These steps help shift focus to **better detecting churn** while maintaining overall model reliability.


# Step 4b: Random Forest – Hyperparameter Tuning & Threshold Adjustment

**Purpose:** Improve Random Forest performance, especially **recall/F1 for churn**, using hyperparameter tuning and probability threshold adjustment.  
**Goal:** Capture more churned customers while keeping predictions reliable.  
**Notes:** 
- Tuning parameters like `n_estimators`, `max_depth`, `min_samples_leaf` can reduce overfitting.  
- Adjusting the classification threshold can favor predicting churn to improve recall.


In [60]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2, 4]
}

# GridSearch with 3-fold CV
grid_rf = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42, class_weight="balanced"),
    param_grid=param_grid,
    cv=3,
    scoring='f1',       # focus on minority class
    n_jobs=-1
)

# Fit
grid_rf.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_rf.best_params_)

# Predict with best model
best_rf = grid_rf.best_estimator_
y_pred_best = best_rf.predict(X_test)


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}


In [64]:
# Step 4b: Tuned Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# 1️⃣ Define best parameters from GridSearchCV
best_params = {
    'n_estimators': 200,
    'max_depth': 10,
    'min_samples_split': 2,
    'min_samples_leaf': 4,
    'random_state': 42,
    'class_weight': 'balanced'
}

# 2️⃣ Initialize the Random Forest with best parameters
tuned_rf = RandomForestClassifier(**best_params)

# 3️⃣ Train the model on training data
tuned_rf.fit(X_train, y_train)

# 4️⃣ Predict on test data
y_pred_tuned = tuned_rf.predict(X_test)

# 5️⃣ Evaluate performance
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tuned))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred_tuned, digits=4, zero_division=0
))



Confusion Matrix:
 [[761 272]
 [ 85 289]]

Classification Report:
               precision    recall  f1-score   support

           0     0.8995    0.7367    0.8100      1033
           1     0.5152    0.7727    0.6182       374

    accuracy                         0.7463      1407
   macro avg     0.7073    0.7547    0.7141      1407
weighted avg     0.7974    0.7463    0.7590      1407



 **Random Forest – Hyperparameter Tuning (Interpretation)**

Confusion Matrix:
- **True Negatives (TN) = 761** → correctly predicted non-churn  
- **False Positives (FP) = 272** → predicted churn but actually non-churn  
- **False Negatives (FN) = 85** → predicted non-churn but actually churn  
- **True Positives (TP) = 289** → correctly predicted churn  

**Classification Report Highlights:**  
- Accuracy: 0.7463 → slight improvement over untuned RF  
- Recall (Churn): 0.7727 → most churners correctly detected  
- Precision (Churn): 0.5152 → moderate; some false positives exist  
- F1-score (Churn): 0.6182 → balanced performance  
- Weighted F1: 0.7590 → good overall metric considering class imbalance  

**Key Takeaways:**  
- Tuned RF captures **more churners** than baseline and untuned RF.  
- High recall indicates the model is effective in detecting churn.  
- F1-score shows **moderate performance**, precision trade-off is acceptable.  
- Ready for deployment: interpretable, stable, and actionable.



### Step 5: Boosting Model (XGBoost)

**Purpose:**  
Use gradient boosting to capture complex patterns missed by Random Forest; sequentially focus on misclassified samples.

**Goal:**  
Improve overall accuracy and weighted F1 while maintaining high recall for churn.

**Notes:**  
- Parameters: `n_estimators=200`, `max_depth=6`, `learning_rate=0.1`, `scale_pos_weight` to handle class imbalance.  
- Captures non-linear interactions and subtle patterns in the data.  
- Confusion matrix and classification report show improved weighted performance; recall for churn remains high.  
- Slightly more complex and less interpretable than Random Forest; still strong for deployment/demo purposes.


In [70]:
# Install XGBoost 
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.4-py3-none-win_amd64.whl (56.8 MB)
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
    --------------------------------------- 0.8/56.8 MB 6.7 MB/s eta 0:00:09
   - -------------------------------------- 2.1/56.8 MB 5.9 MB/s eta 0:00:10
   -- ------------------------------------- 3.4/56.8 MB 5.9 MB/s eta 0:00:10
   --- ------------------------------------ 4.5/56.8 MB 6.0 MB/s eta 0:00:09
   ---- ----------------------------------- 6.0/56.8 MB 6.1 MB/s eta 0:00:09
   ---- ----------------------------------- 7.1/56.8 MB 6.1 MB/s eta 0:00:09
   ----- ---------------------------------- 8.4/56.8 MB 6.0 MB/s eta 0:00:09
   ------ --------------------------------- 9.7/56.8 MB 6.1 MB/s eta 0:00:08
   ------- -------------------------------- 11.0/56.8 MB 6.2 MB/s eta 0:00:08
   -------- ------------------------------- 12.3/56.8 MB 6.2 MB/s eta 0:00:08
   --------

In [76]:
# Step 5: XGBoost Classifier
import xgboost as xgb
from sklearn.metrics import classification_report, confusion_matrix

# Initialize XGBoost with basic parameters
xgb_model = xgb.XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    scale_pos_weight=(len(y_train[y_train==0]) / len(y_train[y_train==1])),
    random_state=42,
    eval_metric='logloss'
)

# Train
xgb_model.fit(X_train, y_train)

# Predict
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))
print("\nClassification Report:\n", classification_report(
    y_test, y_pred_xgb, digits=4, zero_division=0
))


Confusion Matrix:
 [[779 254]
 [ 95 279]]

Classification Report:
               precision    recall  f1-score   support

           0     0.8913    0.7541    0.8170      1033
           1     0.5235    0.7460    0.6152       374

    accuracy                         0.7520      1407
   macro avg     0.7074    0.7501    0.7161      1407
weighted avg     0.7935    0.7520    0.7634      1407



** Boosting Model (XGBoost) – Interpretation**

Confusion Matrix:
- **True Negatives (TN) = 779** → correctly predicted non-churn  
- **False Positives (FP) = 254** → predicted churn but actually non-churn  
- **False Negatives (FN) = 95** → predicted non-churn but actually churn  
- **True Positives (TP) = 279** → correctly predicted churn  

**Classification Report Highlights:**  
- Accuracy: 0.7520 → slightly higher than Tuned RF  
- Recall (Churn): 0.7460 → most churners detected  
- Precision (Churn): 0.5235 → moderate; some false positives exist  
- F1-score (Churn): 0.6152 → balanced performance  
- Weighted F1: 0.7634 → highest overall performance  

**Key Takeaways:**  
- XGBoost captures **complex patterns** better than RF.  
- Recall remains high → effective for churn detection.  
- F1-score indicates **moderate performance**; precision trade-off is acceptable.  
- Slightly more complex and less interpretable than Random Forest; strong candidate for deployment/demo.



#  Step6 : Model Comparison

In [87]:


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Dictionary to store metrics
model_metrics = {}

# 1️⃣ Dummy Classifier
model_metrics['Dummy'] = {
    'Accuracy': accuracy_score(y_test, y_pred_baseline),
    'Precision_Churn': precision_score(y_test, y_pred_baseline, pos_label=1, zero_division=0),
    'Recall_Churn': recall_score(y_test, y_pred_baseline, pos_label=1, zero_division=0),
    'F1_Churn': f1_score(y_test, y_pred_baseline, pos_label=1, zero_division=0)
}

# 2️⃣ Decision Tree
model_metrics['Decision Tree'] = {
    'Accuracy': accuracy_score(y_test, y_pred_tree),
    'Precision_Churn': precision_score(y_test, y_pred_tree, pos_label=1, zero_division=0),
    'Recall_Churn': recall_score(y_test, y_pred_tree, pos_label=1, zero_division=0),
    'F1_Churn': f1_score(y_test, y_pred_tree, pos_label=1, zero_division=0)
}

# 3️⃣ Random Forest (default)
model_metrics['Random Forest'] = {
    'Accuracy': accuracy_score(y_test, y_pred_rf),
    'Precision_Churn': precision_score(y_test, y_pred_rf, pos_label=1, zero_division=0),
    'Recall_Churn': recall_score(y_test, y_pred_rf, pos_label=1, zero_division=0),
    'F1_Churn': f1_score(y_test, y_pred_rf, pos_label=1, zero_division=0)
}

# 4️⃣ Tuned Random Forest
model_metrics['Tuned RF'] = {
    'Accuracy': accuracy_score(y_test, y_pred_tuned),
    'Precision_Churn': precision_score(y_test, y_pred_tuned, pos_label=1, zero_division=0),
    'Recall_Churn': recall_score(y_test, y_pred_tuned, pos_label=1, zero_division=0),
    'F1_Churn': f1_score(y_test, y_pred_tuned, pos_label=1, zero_division=0)
}

# 5️⃣ XGBoost
model_metrics['XGBoost'] = {
    'Accuracy': accuracy_score(y_test, y_pred_xgb),
    'Precision_Churn': precision_score(y_test, y_pred_xgb, pos_label=1, zero_division=0),
    'Recall_Churn': recall_score(y_test, y_pred_xgb, pos_label=1, zero_division=0),
    'F1_Churn': f1_score(y_test, y_pred_xgb, pos_label=1, zero_division=0)
}

# Convert to DataFrame for pretty display
model_comparison_df = pd.DataFrame(model_metrics).T
model_comparison_df = model_comparison_df.sort_values(by='F1_Churn', ascending=False)
model_comparison_df


Unnamed: 0,Accuracy,Precision_Churn,Recall_Churn,F1_Churn
Tuned RF,0.746269,0.515152,0.772727,0.618182
XGBoost,0.751955,0.523452,0.745989,0.615215
Random Forest,0.776119,0.6,0.473262,0.529148
Decision Tree,0.727079,0.486911,0.497326,0.492063
Dummy,0.734186,0.0,0.0,0.0



**Interpretation:**
- **Tuned Random Forest** achieves the best balance between detecting churners (recall = 0.7727) and overall performance (F1 = 0.6182).  
- **XGBoost** has slightly higher accuracy (0.7520) but slightly lower F1 for churn, making it comparable to Tuned RF.  
- **Default Random Forest** prioritizes precision over recall → fewer false positives, but misses many churners.  
- **Decision Tree** shows moderate performance but is lower than ensemble models.  
- **Dummy Classifier** performs poorly for churn (as expected), serving as the baseline.  

**Key Takeaways:**
- **Tuned Random Forest** is the most suitable model for deployment, as it balances **recall and F1** for churners.  
- XGBoost is a strong alternative if slight accuracy gain is preferred.  
- Ensemble models clearly outperform single decision trees and baseline.  


# Step 7`: Save and Load Final Model using joblib

In [95]:
import joblib

# 1️⃣ Save the tuned Random Forest model
model_filename = 'final_churn_model.pkl'
joblib.dump(tuned_rf, model_filename)

# 2️⃣ Load the model (for later use or deployment)
loaded_model = joblib.load(model_filename)

# 3️⃣ Evaluate loaded model to confirm it works
model_score = loaded_model.score(X_test, y_test)
print("Loaded model accuracy on test set:", model_score)

Loaded model accuracy on test set: 0.746268656716418
