## Import Required Libraries

In [1]:
import numpy as np
import pandas as pd 
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_predict, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, make_scorer
import joblib


In [2]:
import warnings
warnings.filterwarnings('ignore')

## Load The Processed Dataset

In [4]:
# Load the Processed Data
X_train= pd.read_csv('../data/X_train_processed.csv')
X_test= pd.read_csv('../data/X_test_processed.csv')
y_train= pd.read_csv('../data/y_train.csv')
y_test= pd.read_csv('../data/y_test.csv')

# Convert y_train and y_test from Dataframes to Series
y_train= y_train.squeeze()
y_test= y_test.squeeze()

print("Processed data loaded successfully!")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


Processed data loaded successfully!
X_train shape: (5625, 46)
y_train shape: (5625,)
X_test shape: (1407, 46)
y_test shape: (1407,)


## Training The Baseline Model

### Baseline Model with DummyClassifier

In machine learning, establishing a **baseline** is crucial. A baseline is a simple model that acts as a **reference point**.  
If more complex models cannot outperform this baseline, they provide little to no added value.

---

### Why DummyClassifier?
- The dataset is **imbalanced**, with most customers not churning.  
- A `DummyClassifier` that always predicts **"No Churn"** will still achieve a relatively high **accuracy**.  
- This performance represents the **minimum benchmark** our real models must surpass to be considered effective.  

---

In [6]:
# Create an instance and train the DummyClassifier model
dummy_clf= DummyClassifier(strategy='most_frequent')
dummy_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_dummy= dummy_clf.predict(X_test)

# Evaluate the model
print("Dummy Classifier Performance: ")
report_dict= classification_report(y_test, y_pred_dummy, output_dict=True)
report_df= pd.DataFrame(report_dict).T
display(report_df)


Dummy Classifier Performance: 


Unnamed: 0,precision,recall,f1-score,support
0,0.734186,1.0,0.846721,1033.0
1,0.0,0.0,0.0,374.0
accuracy,0.734186,0.734186,0.734186,0.734186
macro avg,0.367093,0.5,0.423361,1407.0
weighted avg,0.539029,0.734186,0.621651,1407.0



---

### Key Takeaways
- The classifier achieves **73.4% accuracy** by predicting only the majority class (No Churn).  
- However, it completely **fails to identify churned customers (Class 1)** — precision, recall, and F1-score are **zero** for this class.  
- This highlights why **accuracy alone is misleading** on imbalanced datasets, and why we must focus on other metrics (recall, F1-score, precision) for meaningful evaluation.

## Train the Models

In [None]:
# Iniatilize the models with class imbalance handling
log_reg_model= LogisticRegression(random_state=42, class_weight='balanced')
dt_model= DecisionTreeClassifier(random_state=42)
rf_model= RandomForestClassifier(random_state=42, class_weight='balanced')

# scale_pos_weight is the ratio of negative class instances to positive class instances
scale_pos_weight_value = (y_train == 0).sum()/(y_train == 1).sum()
xgb_model= XGBClassifier(random_state=42, scale_pos_weight=scale_pos_weight_value, eval_metric='logloss', use_label_encoder=False)
svm_model= SVC(random_state=42, class_weight='balanced', probability=True) # probability=True is needed for ROC-AUC later

# Dictionary of all models to train
models_to_train= {
    'Logistic Regression': log_reg_model,
    'Decision Tree': dt_model,
    'Random Forest': rf_model,
    'XGBoost': xgb_model,
    'SVM': svm_model
}

# Dictionary to store models
models= {}

print("Training the models.....")

for name, model in models_to_train.items():
    model.fit(X_train, y_train)
    models[name]= model
    print(f"-{name} trained")
 
print("\nAll the models trained successfully!")


Training the models.....
-Logistic Regression trained
-Decision Tree trained
-Random Forest trained
-XGBoost trained
-SVM trained

All the models trained successfully!


## Evaluate the Models

In [9]:
cv= StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Dictionary to store results
cv_results= {}
detailed_cv_reports= {}

print("Evaluating models using StratifiedKFold CV.....")

for name, model in models.items():
    y_pred_cv= cross_val_predict(model, X_train, y_train, cv=cv, method='predict')
    y_proba_cv= cross_val_predict(model, X_train, y_train, cv=cv, method='predict_proba')[:, 1] \
                if hasattr(model, 'predict_proba') else None
    
    # Overall Metrics
    accuracy= accuracy_score(y_train, y_pred_cv)
    precision= precision_score(y_train, y_pred_cv, zero_division=0)
    recall= recall_score(y_train, y_pred_cv, zero_division=0)
    f1= f1_score(y_train, y_pred_cv, zero_division=0)
    roc_auc= roc_auc_score(y_train, y_proba_cv) if y_proba_cv is not None else 'NA'

    # Churn Class Metrics
    report= classification_report(y_train, y_pred_cv, output_dict=True, zero_division=0)
    churn_precision= report['1']['precision']
    churn_recall= report['1']['recall']
    churn_f1= report['1']['f1-score']

    # Store the results
    cv_results[name]= {
        'Accuracy': accuracy,
        'Precision (Overall)': precision,
        'Recall (Overall)': recall,
        'F1-Score (Overall)': f1,
        'ROC-AUC': roc_auc,
        'Precision (Churn)': churn_precision,
        'Recall (Churn)': churn_recall,
        'F1-Score (Churn)': churn_f1

    }

    # Store the detailed Per-Class Report
    detailed_cv_reports[name]= pd.DataFrame(report).T

    print(f"-{name} evaluated on CV")

# Summary DataFrame
cv_results_df= pd.DataFrame(cv_results).T
print("\nCross-validated Performance on Training Data: ")
display(cv_results_df.sort_values(by='F1-Score (Churn)', ascending=False))


Evaluating models using StratifiedKFold CV.....
-Logistic Regression evaluated on CV
-Decision Tree evaluated on CV
-Random Forest evaluated on CV
-XGBoost evaluated on CV
-SVM evaluated on CV

Cross-validated Performance on Training Data: 


Unnamed: 0,Accuracy,Precision (Overall),Recall (Overall),F1-Score (Overall),ROC-AUC,Precision (Churn),Recall (Churn),F1-Score (Churn)
Logistic Regression,0.751822,0.521531,0.802007,0.632051,0.845603,0.521531,0.802007,0.632051
SVM,0.7504,0.520106,0.787291,0.626397,0.826055,0.520106,0.787291,0.626397
XGBoost,0.772622,0.562284,0.652174,0.603902,0.82703,0.562284,0.652174,0.603902
Random Forest,0.788622,0.634921,0.481605,0.547737,0.822896,0.634921,0.481605,0.547737
Decision Tree,0.727111,0.486842,0.494983,0.490879,0.652752,0.486842,0.494983,0.490879



---

### Observations

- **DummyClassifier (Baseline)** achieved ~73.4% accuracy by always predicting “No Churn”.  
- **Logistic Regression** achieved the **highest recall (0.802)** for churn customers, making it strong at identifying churners, though at the cost of lower precision.  
- **SVM** performed similarly to Logistic Regression, with balanced precision and recall but slightly lower overall metrics.  
- **XGBoost** provided a better balance between precision and recall compared to Logistic Regression but did not outperform it in F1-score for churn.  
- **Random Forest** achieved the **highest accuracy (0.789)** and precision but suffered from **low recall (0.482)**, meaning it misses many churners.  
- **Decision Tree** had the weakest overall performance, with low F1-scores and ROC-AUC.  

---

### Best-Performing Models

- **Logistic Regression** – Best for **identifying churners** (high recall and F1 for churn, ROC-AUC = 0.846).  
- **Random Forest** – Best for **overall accuracy and precision**, but weaker at capturing churners.  
- **XGBoost** – Offers a **middle ground**, with balanced performance across metrics and competitive ROC-AUC.  

---

**Key Takeaways:**  
- **Logistic Regression** excels at **detecting churners**, which is often more important than overall accuracy in churn prediction.  
- **Random Forest** is best if the goal is **high overall accuracy**.  
- **XGBoost** gives a **balanced approach**, making it a strong candidate for further hyperparameter tuning.  
- The **DummyClassifier** highlights that naive accuracy can be misleading when the dataset is imbalanced.

---

## Broad HyperParameter Search

In [10]:
# Parameter Grids
param_dist_lr= {
    "penalty": ["l1", "l2"],
    "C": np.logspace(-3, 3, 10),
    "solver": ["liblinear", "saga"],
    "max_iter": [100, 500, 1000],

}

param_dist_rf= {
    "n_estimators": [100, 200, 500, 1000],
    "max_depth": [None, 5, 10, 20 ,50],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4, 10],
    "max_features": ["sqrt", "log2", None],
    "bootstrap": [True, False] 

}

param_dist_xgb= {
    "n_estimators": [100, 300, 500, 1000],
    "learning_rate": [0.001, 0.01, 0.05, 0.1, 0.2],
    "max_depth": [3, 5, 7, 10],
    "min_child_weight": [1, 3, 5, 10],
    "subsample": [0.5, 0.7, 0.8, 1.0],
    "colsample_bytree": [0.5, 0.7, 0.8, 1.0],
    "gamma": [0, 0.1, 0.3, 0.5, 1],
    "reg_alpha": [0, 0.01, 0.1, 1, 10],
    "reg_lambda": [0, 0.01, 0.1, 1, 10]

}

#Models
models_to_tune= {
    'Logistic Regression': LogisticRegression(random_state=42, class_weight='balanced'),
    'Random Forest': RandomForestClassifier(random_state=42, class_weight='balanced'),
    'XGBoost': XGBClassifier(random_state=42, eval_metric= 'logloss', use_label_encoder=False, verbosity=0)

}

param_dists= {
    'Logistic Regression': param_dist_lr,
    'Random Forest': param_dist_rf,
    'XGBoost': param_dist_xgb

}

# Scores
scorers= {
    "f1_churn": make_scorer(f1_score, pos_label=1),
    "roc_auc": "roc_auc"
}

# Result containers
best_models, best_scorers, best_params= {}, {}, {}
all_results= {}

cv_folds= StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for name, model in models_to_tune.items():
    print(f"\nPerforming broad search for {name}.....")

    if name=='XGBoost':
        scale_pos_weight_value= (y_train==0).sum()/(y_train==1).sum()
        model.set_params(scale_pos_weight= scale_pos_weight_value)

    random_search= RandomizedSearchCV(
        estimator= model,
        param_distributions= param_dists[name],
        n_iter= 50,
        cv= cv_folds,
        scoring= scorers,
        refit= "f1_churn",
        n_jobs= -1,
        verbose= 1,
        random_state=42, 
        return_train_score= True

    )

    random_search.fit(X_train, y_train)

    # Store Best
    best_models[name]= random_search.best_estimator_
    best_scorers[name]= {
        "Best F1 (Churn)": random_search.cv_results_["mean_test_f1_churn"][random_search.best_index_],
        "Best ROC_AUC": random_search.cv_results_["mean_test_roc_auc"][random_search.best_index_]

    }
    best_params[name]= random_search.best_params_
    all_results[name]= pd.DataFrame(random_search.cv_results_)

    print(f"Best params: {random_search.best_params_}")
    print(f"Best CV F1-Score (Churn): {best_scorers[name]['Best F1 (Churn)']:.4f}")
    print(f"Best CV ROC-AUC: {best_scorers[name]['Best ROC_AUC']:.4f}")
    print("-"*50)



Performing broad search for Logistic Regression.....
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best params: {'solver': 'liblinear', 'penalty': 'l1', 'max_iter': 1000, 'C': np.float64(0.46415888336127775)}
Best CV F1-Score (Churn): 0.6329
Best CV ROC-AUC: 0.8457
--------------------------------------------------

Performing broad search for Random Forest.....
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best params: {'n_estimators': 100, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 10, 'bootstrap': True}
Best CV F1-Score (Churn): 0.6406
Best CV ROC-AUC: 0.8488
--------------------------------------------------

Performing broad search for XGBoost.....
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best params: {'subsample': 0.5, 'reg_lambda': 1, 'reg_alpha': 0.01, 'n_estimators': 1000, 'min_child_weight': 3, 'max_depth': 7, 'learning_rate': 0.001, 'gamma': 1, 'colsample_bytree': 0.5}
Best CV F1-

## Fine_Tuning HyperParameters

In [11]:
# Fine tuning Logistic Regression
param_grid_lr_fine= {
    "penalty": ["l1", "l2"],
    "C": np.linspace(0.1, 1, 10),
    "solver": ["liblinear", "saga"],
    "max_iter": [500, 1000, 1500]

}

grid_search_lr= GridSearchCV(
    estimator= LogisticRegression(random_state=42, class_weight='balanced'),
    param_grid=param_grid_lr_fine,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring=scorers,
    refit='f1_churn',
    n_jobs=-1,
    verbose=1,
    return_train_score=True

)

# Result containers
best_models_tuned, best_scorers_tuned, best_params_tuned= {}, {}, {}
all_results_tuned= {}

print("Performing fine-tuning for Logistic Regression.....")
grid_search_lr.fit(X_train, y_train)

# Store best results
best_models_tuned['Logistic Regression (Tuned)']= grid_search_lr.best_estimator_
best_scorers_tuned['Logistic Regression (Tuned)']= {
    "Best F1 (Churn)": grid_search_lr.cv_results_["mean_test_f1_churn"][grid_search_lr.best_index_],
    "Best ROC_AUC": grid_search_lr.cv_results_["mean_test_roc_auc"][grid_search_lr.best_index_]
}
best_params_tuned['Logistic Regression (Tuned)']= grid_search_lr.best_params_

print(f"\nBest parameters for Tuned Logistic Regression: {grid_search_lr.best_params_}")
print(f"Best CV F1-Score (Churn): {best_scorers_tuned['Logistic Regression (Tuned)']["Best F1 (Churn)"]:.4f}" )
print(f"Best CV ROC-AUC: {best_scorers_tuned['Logistic Regression (Tuned)']["Best ROC_AUC"]:.4f}")


Performing fine-tuning for Logistic Regression.....
Fitting 5 folds for each of 120 candidates, totalling 600 fits

Best parameters for Tuned Logistic Regression: {'C': np.float64(0.8), 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
Best CV F1-Score (Churn): 0.6333
Best CV ROC-AUC: 0.8458


In [12]:
# Fine Tuning Random Forest
param_grid_rf_fine= {
    "n_estimators": [100, 200, 300],
    "max_depth": [8, 9, 10, 11, 12, 15],
    "min_samples_split": [2, 3, 5],
    "min_samples_leaf": [8, 9, 10, 11, 12],
    "max_features": ["sqrt"],
    "bootstrap": [True]
}

grid_search_rf= GridSearchCV(
    estimator= RandomForestClassifier(random_state=42, class_weight='balanced'),
    param_grid= param_grid_rf_fine,
    cv= StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring= scorers,
    refit= "f1_churn",
    n_jobs=-1,
    verbose=1,
    return_train_score=True
)

print("Performing fine-tuning for Random Forest.....")
grid_search_rf.fit(X_train, y_train)

# Store best results
best_models_tuned['Random Forest (Tuned)']= grid_search_rf.best_estimator_
best_scorers_tuned['Random Forest (Tuned)']= {
    "Best F1 (Churn)": grid_search_rf.cv_results_["mean_test_f1_churn"][grid_search_rf.best_index_],
    "Best ROC_AUC": grid_search_rf.cv_results_["mean_test_roc_auc"][grid_search_rf.best_index_]
}
best_params_tuned['Random Forest (Tuned)']= grid_search_rf.best_params_

print(f"\nBest parameters for Tuned Random Forest: {grid_search_rf.best_params_}")
print(f"Best CV F1-Score (Churn): {best_scorers_tuned['Random Forest (Tuned)']["Best F1 (Churn)"]:.4f}")
print(f"Best CV ROC-AUC: {best_scorers_tuned['Random Forest (Tuned)']["Best ROC_AUC"]:.4f}")


Performing fine-tuning for Random Forest.....
Fitting 5 folds for each of 270 candidates, totalling 1350 fits

Best parameters for Tuned Random Forest: {'bootstrap': True, 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'min_samples_split': 2, 'n_estimators': 100}
Best CV F1-Score (Churn): 0.6406
Best CV ROC-AUC: 0.8488


In [13]:
# Fine Tuning XGBoost
param_dist_xgb_fine= {
    "n_estimators": [800, 1000, 1200],
    "learning_rate": [0.001, 0.005, 0.01],
    "max_depth": [6, 7, 8],
    "min_child_weight": [2, 3, 4],
    "subsample": [0.4, 0.5, 0.6],
    "colsample_bytree": [0.4, 0.5, 0.6],
    "gamma": [0.5, 1, 1.5],
    "reg_alpha": [0.005, 0.01, 0.05],
    "reg_lambda": [0.5, 1, 2]

}

scale_pos_weight_value= (y_train==0).sum()/(y_train==1).sum()

random_search_xgb= RandomizedSearchCV(
    estimator=XGBClassifier(random_state=42,scale_pos_weight= scale_pos_weight_value, eval_metric='logloss', use_label_encoder=False),
    param_distributions=param_dist_xgb_fine,
    n_iter=200,
    cv= StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring=scorers,
    refit="f1_churn",
    n_jobs=-1,
    verbose=1,
    return_train_score=True,
    random_state=42
)

print("Performing fine-tuining for XGBoost.....")
random_search_xgb.fit(X_train, y_train)

# Store best results
best_models_tuned['XGBoost (Tuned)']= random_search_xgb.best_estimator_
best_scorers_tuned['XGBoost (Tuned)']= {
    "Best F1 (Churn)": random_search_xgb.cv_results_["mean_test_f1_churn"][random_search_xgb.best_index_],
    "Best ROC_AUC": random_search_xgb.cv_results_["mean_test_roc_auc"][random_search_xgb.best_index_]
}
best_params_tuned['XGBoost (Tuned)']= random_search_xgb.best_params_

print(f"\nBest parameters for Tuned XGBoost: {random_search_xgb.best_params_}")
print(f"Best CV F1-Score (Churn): {best_scorers_tuned['XGBoost (Tuned)']["Best F1 (Churn)"]:.4f}")
print(f"Best CV ROC-AUC: {best_scorers_tuned['XGBoost (Tuned)']["Best ROC_AUC"]:.4f}")

Performing fine-tuining for XGBoost.....
Fitting 5 folds for each of 200 candidates, totalling 1000 fits

Best parameters for Tuned XGBoost: {'subsample': 0.6, 'reg_lambda': 0.5, 'reg_alpha': 0.005, 'n_estimators': 800, 'min_child_weight': 3, 'max_depth': 7, 'learning_rate': 0.005, 'gamma': 1, 'colsample_bytree': 0.4}
Best CV F1-Score (Churn): 0.6418
Best CV ROC-AUC: 0.8481



---

### Key Insights

- **Logistic Regression**: Performs decently with strong ROC-AUC but lags behind tree-based models in F1, making it more suitable as a baseline or for interpretability.

- **Random Forest**: Offers balanced performance with good F1 and ROC-AUC, showing that controlled tree depth and regularization prevent overfitting.

- **XGBoost**: Achieves the best overall performance, slightly outperforming Random Forest in F1 while maintaining high ROC-AUC, making it the most effective model for churn prediction.

---

### Performance Evolution
**Logistic Regression**

- Initial Training: Strong ROC-AUC (0.8456) and decent F1 (0.6321), indicating it’s a reliable baseline.

- Broad Search: Achieved small improvements in F1 (0.6329) and ROC-AUC (0.8457), showing robustness but limited capacity to capture complex churn patterns.

- Fine-Tuning: Results stayed consistent (F1= 0.6333, ROC-AUC= 0.8458), suggesting diminishing returns from additional tuning.

**Random Forest**

- Initial Training: Produced balanced results (F1= 0.5477, ROC-AUC= 0.8229), but recall on churn cases was weaker.

- Broad Search: Significant boost in F1 (0.6406) and ROC-AUC (0.8488), showing that hyperparameter control improves generalization.

- Fine-Tuning: Performance plateaued (same F1= 0.6406, ROC-AUC= 0.8488), indicating the model already reached an optimal region.

**XGBoost**

- Initial Training: Started lower than Random Forest in F1 (0.6039) but with decent ROC-AUC (0.8270).

- Broad Search: Clear gains (F1= 0.6398, ROC-AUC= 0.8495), proving tuning has strong impact on boosting methods.

- Fine-Tuning: Marginal improvements in F1 (0.6418 vs. 0.6398) but slight dip in ROC-AUC (0.8481 vs. 0.8495), suggesting the model is near its performance ceiling.

---

### Overall Insight
Logistic Regression started as the strongest baseline with high recall and competitive ROC-AUC, making it effective for identifying churners. Random Forest showed clear gains from hyperparameter tuning. After fine-tuning, however, XGBoost slightly outperformed the others, offering the best balance of F1 and ROC-AUC.

## Retrain Final Models

In [14]:
# Refit the best models of all the fine-tuned models on complete training dataset
print("Retraining final models on the complete training dataset with best parameters....")
for name, model in best_models_tuned.items():
    model.fit(X_train, y_train)
    print(f"-{name} retrained with best parameters")


Retraining final models on the complete training dataset with best parameters....
-Logistic Regression (Tuned) retrained with best parameters
-Random Forest (Tuned) retrained with best parameters
-XGBoost (Tuned) retrained with best parameters


## Final Model Evaluation on Hold-Out Test Set

In [15]:
fine_tuned_results= {}

print("Evaluating final models on the hold-out test set.....")
print("-"*50)

for name, model in best_models_tuned.items():
    y_pred= model.predict(X_test)
    y_proba= model.predict_proba(X_test)[:, 1]

    accuracy= accuracy_score(y_test, y_pred)
    f1= f1_score(y_test, y_pred, pos_label=1, zero_division=0)
    roc_auc= roc_auc_score(y_test, y_proba)

    fine_tuned_results[name]= {
        "Accuracy": accuracy,
        "F1-Score (Churn)": f1,
        "ROC-AUC": roc_auc
    }

    print(f"{name} Final Evaluation: ")
    print(f"    Accuracy: {accuracy:.4f}")
    print(f"    F1-Score (Churn): {f1:.4f}")
    print(f"    ROC-AUC: {roc_auc}")
    print("-"*50)


fine_tuned_df= pd.DataFrame(fine_tuned_results).T
print("Summary of Final Test Set Results: ")
display(fine_tuned_df.sort_values(by="F1-Score (Churn)", ascending=False).round(4))

Evaluating final models on the hold-out test set.....
--------------------------------------------------
Logistic Regression (Tuned) Final Evaluation: 
    Accuracy: 0.7271
    F1-Score (Churn): 0.6082
    ROC-AUC: 0.8357193885210513
--------------------------------------------------
Random Forest (Tuned) Final Evaluation: 
    Accuracy: 0.7527
    F1-Score (Churn): 0.6266
    ROC-AUC: 0.8351991240921256
--------------------------------------------------
XGBoost (Tuned) Final Evaluation: 
    Accuracy: 0.7392
    F1-Score (Churn): 0.6092
    ROC-AUC: 0.8346710945224698
--------------------------------------------------
Summary of Final Test Set Results: 


Unnamed: 0,Accuracy,F1-Score (Churn),ROC-AUC
Random Forest (Tuned),0.7527,0.6266,0.8352
XGBoost (Tuned),0.7392,0.6092,0.8347
Logistic Regression (Tuned),0.7271,0.6082,0.8357



---

### Key Observations
**Logistic Regression**
- Lowest accuracy among the three (0.7271).
- F1-Score for churners (0.6082) is close to **XGBoost** but below **Random Forest**.
- Achieved the highest ROC-AUC (0.8357) showing strong ranking ability despite lowest accuracy.

**Random Forest**
- Best overall performer with highest accuracy (0.7527) and F1-Score for churn (0.6266).
- ROC-AUC (0.8352) is compitetive and only slightly lower than **Logistic Regression**.
- Demostrates a good balance of precision and recall on the hold-out test set.

**XGBoost**
- Middle ground with accuracy (0.7392) higher than **Logistic Regression** but lowen than **Random Forest**.
- F1-Score (0.6092) nearly matches **Logistic Regression**, but below **Random Forest**.
- ROC-AUC (0.8347) is the lowest of the three but still very strong.


**Conclusion:** *Random Forest* is the top-performing model on the hold-out test set, achieving the best accuracy and F1-Score, with strong ROC-AUC. *XGBoost* is a close competitor, while *Logistic Regression*, though slightly better in ROC-AUC, lags behind in accuracy and F1-Score. 




## Save The Required Outputs

In [16]:
# Save the three final trained models
for name, model in best_models_tuned.items():
    joblib.dump(model, f'../models/{name}_model.pkl')
    print(f"{name} model saved to '../models/{name}_model.pkl'")

Logistic Regression (Tuned) model saved to '../models/Logistic Regression (Tuned)_model.pkl'
Random Forest (Tuned) model saved to '../models/Random Forest (Tuned)_model.pkl'
XGBoost (Tuned) model saved to '../models/XGBoost (Tuned)_model.pkl'


In [17]:
# Save the test results
fine_tuned_df.to_csv('../results/test_results.csv', index= True)