In [None]:
# Install & Import
# !pip install xgboost 

import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

In [None]:
# Load Data
df = pd.read_csv("../data/processed/clean_telco_churn.csv")

X = df.drop(columns=["Churn"])
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
# Train Baseline XGBoost
xgb = XGBClassifier(
    n_estimators=200,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=(y_train.value_counts()[0] / y_train.value_counts()[1]),
    eval_metric="logloss",
    random_state=42
)

xgb.fit(X_train, y_train)

In [None]:
# Evaluate XGBoost
y_pred = xgb.predict(X_test)
y_prob = xgb.predict_proba(X_test)[:, 1]

xgb_results = {
    "model_name": "XGBoost",
    "roc_auc": roc_auc_score(y_test, y_prob),
    "precision": precision_score(y_test, y_pred),
    "recall": recall_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred)
}

xgb_results

In [None]:
# Save Results to JSON
import json

with open("../results/xgboost.json", "w") as f:
    json.dump(xgb_results, f, indent=4)

In [None]:
# Compare Against Final Model
files.append("../results/xgboost.json")

comparison_df = pd.DataFrame([
    json.load(open(f)) for f in files
])

comparison_df.sort_values(by="recall", ascending=False)


# If XGBoost:
# ‚ùå Improves recall by < 2% ‚Üí KEEP Random Forest
# ‚úÖ Improves recall by ‚â• 2‚Äì3% ‚Üí XGBoost becomes final model

## üöÄ XGBoost Evaluation

XGBoost was evaluated as an advanced model to determine whether boosting could improve churn detection.

While XGBoost demonstrated strong performance, the improvement over the tuned Random Forest was marginal. Given the increased complexity and reduced interpretability, the tuned Random Forest remains the final selected model.