In [1]:
import sys, os
sys.path.append(os.path.abspath(".."))  # add project root

# Churn Prediction

In [2]:
from src.etl.preprocess import (
    load_raw_data,
    clean_and_engineer,
    encode_features,
    split_and_save,
)
from src.models.train import (
    load_processed_data,
    train_model,
    evaluate_model
)
from src.models.train import save_model

df_raw = load_raw_data()
df_clean = clean_and_engineer(df_raw)

df_clean.head()
df_clean["Churn"].value_counts(normalize=True)  # check class balance

X, y = encode_features(df_clean)
X.shape, y.shape


((7032, 33), (7032,))

In [3]:
from src.etl.preprocess import run_full_preprocess
X_train, X_test, y_train, y_test = run_full_preprocess()

X_train.head()
y_train.value_counts()


Churn
0    4130
1    1495
Name: count, dtype: int64

In [4]:
X_train.shape, X_test.shape

((5625, 33), (1407, 33))

In [5]:
clf = train_model(X_train, y_train)

In [6]:
evaluate_model(clf, X_test, y_test)


=== MODEL EVALUATION ===
AUC: 0.8224

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1033
           1       0.64      0.50      0.56       374

    accuracy                           0.79      1407
   macro avg       0.74      0.70      0.71      1407
weighted avg       0.78      0.79      0.78      1407

Confusion Matrix:
[[928 105]
 [186 188]]


In [7]:
save_model(clf)


Model saved to: models/churn_model.joblib


## Evaluation

In [8]:
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

# y_test: actual labels (0/1)
y_pred_proba = clf.predict_proba(X_test)[:, 1] # predicted churn probabilities

roc_auc = roc_auc_score(y_test, y_pred_proba)
print("ROC AUC:", roc_auc)

y_pred = (y_pred_proba >= 0.5).astype(int)

print("Classification Report:\n", classification_report(y_test, y_pred))

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


ROC AUC: 0.822368523225536
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.90      0.86      1033
           1       0.64      0.51      0.57       374

    accuracy                           0.79      1407
   macro avg       0.74      0.70      0.71      1407
weighted avg       0.78      0.79      0.78      1407

Confusion Matrix:
 [[927 106]
 [185 189]]


# Uplift Modeling

In [10]:
from src.models.uplift import run_uplift_pipeline

uplift, p_t, p_c, metrics = run_uplift_pipeline()

Uplift models saved.

Sample uplift scores:
[ 0.02304484  0.06159608 -0.0017718  -0.0327373   0.35448679  0.12608527
 -0.01656203 -0.05866108 -0.13353852  0.01521976]

Uplift Evaluation Metrics:
uplift_at_20_pct: 0.0236
qini_auc: -0.0286
