In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from xgboost import XGBClassifier

# Load encoded dataset
df = pd.read_csv("../data/encoded_churn.csv")

# Separate features and target
X = df.drop("Churn", axis=1)
y = df["Churn"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)

# -------------------------
# XGBoost Model (Basic Tuned)
# -------------------------
xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42
)

# Train model
xgb_model.fit(X_train, y_train)

# Predictions
y_pred = xgb_model.predict(X_test)
y_prob = xgb_model.predict_proba(X_test)[:, 1]

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print("XGBOOST MODEL PERFORMANCE\n")
print("Accuracy :", accuracy)
print("ROC-AUC  :", roc_auc)

# Save metrics
metrics = {
    "Model": ["XGBoost"],
    "Accuracy": [accuracy],
    "ROC_AUC": [roc_auc]
}

metrics_df = pd.DataFrame(metrics)
metrics_df.to_csv("../data/xgboost_metrics.csv", index=False)

print("\nDAY 16 COMPLETED: XGBoost trained & metrics saved")


XGBOOST MODEL PERFORMANCE

Accuracy : 0.7863733144073811
ROC-AUC  : 0.8354852876592006

DAY 16 COMPLETED: XGBoost trained & metrics saved
