In [23]:
import sys, os
sys.path.append(os.path.abspath(".."))

import pandas as pd

from src.models.evaluate import evaluate_at_threshold, compute_roc_auc
from src.models.train import load_model
from src.data.make_dataset import load_and_clean_data
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from src.features.build_features import get_feature_lists, build_preprocessor

In [24]:

df = load_and_clean_data("../data/raw/Telco-Customer-Churn.csv")

X = df.drop(columns=["Churn"])
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [25]:
gb_model = load_model("../models/gb_churn_model.joblib")

In [26]:

num_features, cat_features, bin_features = get_feature_lists()

preprocessor = build_preprocessor(num_features, cat_features, bin_features)

lr_model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("model", LogisticRegression(max_iter=1000))
    ]
)

lr_model.fit(X_train, y_train)

In [27]:
threshold = 0.3


lr_results = evaluate_at_threshold(
    lr_model, X_test, y_test, threshold
)
lr_auc = compute_roc_auc(lr_model, X_test, y_test)["auc"]

In [28]:
gb_results = evaluate_at_threshold(
    gb_model, X_test, y_test, threshold
)
gb_auc = compute_roc_auc(gb_model, X_test, y_test)["auc"]


In [29]:
comparison_df = pd.DataFrame([
    {
        "model": "Logistic Regression",
        "recall": lr_results["classification_report"]["1"]["recall"],
        "auc": lr_auc
    },
    {
        "model": "Gradient Boosting",
        "recall": gb_results["classification_report"]["1"]["recall"],
        "auc": gb_auc
    }
])

comparison_df

Unnamed: 0,model,recall,auc
0,Logistic Regression,0.76738,0.834476
1,Gradient Boosting,0.772727,0.836564
