In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler


import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
telco = pd.read_csv("telco_TS.csv")

telco.rename(columns={"StateNextMonth": "Churn"}, inplace=True)
telco["TotalCharges"] = pd.to_numeric(telco["TotalCharges"], errors="coerce")
telco = telco.dropna(subset=["TotalCharges"])

telco = telco.drop(columns=["customerID", "State"])

telco["Churn"] = telco["Churn"].map({"No Churn": 0, "Churn": 1})

cat_cols = telco.select_dtypes(include=["object"]).columns
df_encoded = pd.get_dummies(telco, columns=cat_cols, drop_first=True)

telco.head()


In [None]:
# Spliting Features & Target
X = df_encoded.drop("Churn", axis=1)
y = df_encoded["Churn"]

X.shape, y.shape


In [None]:
# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train Logistic Regression Model

model = LogisticRegression(max_iter=2000)
model.fit(X_train_scaled, y_train)

In [None]:
y_pred = model.predict(X_test_scaled)

### Model Evaluation

In [None]:
y_pred = model.predict(X_test_scaled)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

### ROC-AUC

In [28]:
from sklearn.metrics import roc_auc_score, roc_curve

y_probs = model.predict_proba(X_test_scaled)[:, 1]
roc_auc = roc_auc_score(y_test, y_probs)

roc_auc


0.7962318058375526

### THRESHOLD TUNING

In [29]:
threshold = 0.4
y_pred_custom = (y_probs >= threshold).astype(int)

print(confusion_matrix(y_test, y_pred_custom))
print(classification_report(y_test, y_pred_custom))

[[642 234]
 [150 383]]
              precision    recall  f1-score   support

           0       0.81      0.73      0.77       876
           1       0.62      0.72      0.67       533

    accuracy                           0.73      1409
   macro avg       0.72      0.73      0.72      1409
weighted avg       0.74      0.73      0.73      1409



### Model Performance Summary

- Baseline Logistic Regression achieved **74% accuracy** and **0.80 ROC-AUC**
- Threshold tuning improved **churn recall from 61% to 72%**
- This demonstrates a business-oriented optimization focused on customer retention


In [30]:
final_threshold = 0.4
final_predictions = (y_probs >= final_threshold).astype(int)