## Setup

In [5]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix
import mlflow

from src.config import RAW_DATA_PATH, RANDOM_SEED
from src.feature_engineering import ChurnFeatureEngineer
from src.train import ChurnModelTrainer

## Quick model comparison

In [6]:
# Load and prepare data
df = pd.read_csv(RAW_DATA_PATH / "WA_Fn-UseC_-Telco-Customer-Churn.csv")
trainer = ChurnModelTrainer()
X_train, X_val, X_test, y_train, y_val, y_test, encoder = trainer.prepare_data(df)

# Train baseline
baseline_model, scaler, baseline_auc = trainer.train_baseline(X_train, X_val, y_train, y_val)

# Train XGBoost
xgb_model, xgb_auc = trainer.train_xgboost(X_train, X_val, y_train, y_val)

print(f"Baseline AUC: {baseline_auc:.4f}")
print(f"XGBoost AUC: {xgb_auc:.4f}")
print(f"Improvement: {((xgb_auc - baseline_auc) / baseline_auc * 100):.1f}%")


INFO:src.train:Preparing data for training...
INFO:src.feature_engineering:Created 31 features
INFO:src.train:Train shape: (4225, 30), Val shape: (1409, 30), Test shape: (1409, 30)
INFO:src.train:Class distribution - Train: 26.53%, Val: 26.54%, Test: 26.54%
INFO:src.train:Training baseline model...
INFO:src.train:Baseline AUC: 0.8399
INFO:src.train:Training XGBoost model...
INFO:src.train:XGBoost AUC: 0.8321


Baseline AUC: 0.8399
XGBoost AUC: 0.8321
Improvement: -0.9%
