In [12]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, classification_report,
    roc_auc_score, roc_curve
)
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv("final_churn_model_dataset.csv")


In [4]:
X = data.drop("Churn", axis=1)
y = data["Churn"]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [10]:
model = LogisticRegression(
    class_weight="balanced",
    max_iter=5000, # Increased max_iter to help convergence
    random_state=42
)

In [15]:
# Make predictions on the scaled test set
y_pred = best_model.predict(X_test_scaled)
y_pred_proba = best_model.predict_proba(X_test_scaled)[:, 1]

# Evaluate the model
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

ROC AUC Score: 0.45359717748120876

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.49      0.60       159
           1       0.20      0.49      0.28        41

    accuracy                           0.49       200
   macro avg       0.49      0.49      0.44       200
weighted avg       0.67      0.49      0.54       200


Confusion Matrix:
 [[78 81]
 [21 20]]


In [7]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [14]:
param_grid = {
    "C": [0.01, 0.1, 1, 10],
    "penalty": ["l2"]
}

grid = GridSearchCV(
    model,
    param_grid,
    cv=cv,
    scoring="roc_auc",
    n_jobs=-1
)

# Select only numeric columns from X_train and X_test to avoid type errors
X_train_numeric = X_train.select_dtypes(include=np.number)
X_test_numeric = X_test.select_dtypes(include=np.number)

# Scale the numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_numeric)
X_test_scaled = scaler.transform(X_test_numeric)

grid.fit(X_train_scaled, y_train)
best_model = grid.best_estimator_

In [17]:
y_pred = best_model.predict(X_test_scaled)
y_prob = best_model.predict_proba(X_test_scaled)[:, 1]

In [18]:
confusion_matrix(y_test, y_pred)


array([[78, 81],
       [21, 20]])

In [19]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.79      0.49      0.60       159
           1       0.20      0.49      0.28        41

    accuracy                           0.49       200
   macro avg       0.49      0.49      0.44       200
weighted avg       0.67      0.49      0.54       200



In [20]:
roc_auc_score(y_test, y_prob)


np.float64(0.45359717748120876)