In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score


In [4]:
df = pd.read_csv("../data/Telco_customer_churn_cleaned.csv")

# Clean TotalCharges again (safety)
df["Total Charges"] = pd.to_numeric(df["Total Charges"], errors="coerce")
df = df.dropna(subset=["Total Charges"])

# Binary target (safe map only if values are strings)
df["Churn Label"] = df["Churn Label"].map({"Yes": 1, "No": 0})
df["Senior Citizen"] = df["Senior Citizen"].map({"Yes": 1, "No": 0})

X = df[["Tenure Months", "Monthly Charges", "Contract", "Payment Method", "Internet Service", "Senior Citizen"]]
y = df["Churn Label"]


In [5]:
#Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [6]:
#Build Interpretable Model Pipeline
categorical = ["Contract", "Payment Method", "Internet Service"]
numerical = ["Tenure Months", "Monthly Charges", "Senior Citizen"]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", "passthrough", numerical),
        ("cat", OneHotEncoder(drop="first"), categorical),
    ]
)

model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", LogisticRegression(max_iter=1000)),
    ]
)


In [7]:
# Train the model and Evaluate

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))



              precision    recall  f1-score   support

           0       0.84      0.90      0.87      1291
           1       0.65      0.53      0.58       467

    accuracy                           0.80      1758
   macro avg       0.75      0.71      0.73      1758
weighted avg       0.79      0.80      0.79      1758

ROC AUC: 0.8370957228183254


In [8]:
X_train.dtypes

Tenure Months         int64
Monthly Charges     float64
Contract             object
Payment Method       object
Internet Service     object
Senior Citizen        int64
dtype: object

In [9]:
# Interpret the Model
feature_names = model.named_steps["preprocessor"].get_feature_names_out()
coefficients = model.named_steps["classifier"].coef_[0]

Importance = pd.DataFrame({
    "Feature": feature_names,
    "Coefficient": coefficients
}).sort_values(by="Coefficient", ascending=False)

print(Importance)

                                       Feature  Coefficient
8            cat__Internet Service_Fiber optic     0.953582
6         cat__Payment Method_Electronic check     0.468599
2                          num__Senior Citizen     0.357460
7             cat__Payment Method_Mailed check     0.064941
5  cat__Payment Method_Credit card (automatic)     0.008379
1                         num__Monthly Charges     0.003605
0                           num__Tenure Months    -0.030976
3                       cat__Contract_One year    -0.780130
9                     cat__Internet Service_No    -0.822652
4                       cat__Contract_Two year    -1.508796


Key Drivers of Churn
1.Customers on fiber optic plaans are much more likely to churn. Strongest churn risk factor.
2. Electronic check payment method is also a strong risk factor they are likely to churn.
3. Monthly charges is positively correlated with churn risk Month-toMonth contracts also increase churn risk.

Product Actions
Push long-term contracts early in the customer journey.
Incentivize customers to switch from electronic check to other payment methods.
Improve fiber optic customer experinece
Focus retention efforts on low-tenure, high-monthly charge customers.