In [61]:
# Importing Libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [63]:
# Loading Dataset

df=pd.read_csv('Telco-Customer-Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [64]:
# Data Cleaning

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()

if 'customerID' in df.columns:
    df.drop('customerID', axis=1, inplace=True)

In [65]:
# Encoding

df=pd.get_dummies(df, drop_first=True)

In [66]:
# Splitting

X = df.drop('Churn_Yes', axis=1)
y = df['Churn_Yes']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [67]:
# Smote

smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [68]:
# Hyperparameter Tuning

rf = RandomForestClassifier(
    n_estimators=400,
    max_depth=25,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)
best_model = rf

In [69]:
# Prediction

y_probs = best_model.predict_proba(X_test)[:, 1]
y_pred = (y_probs >= 0.5).astype(int)

In [70]:
# Accuracy Evaluation

acc = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_probs)
print("ROC AUC Score:", roc_auc)
print("Accuracy:", acc)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

ROC AUC Score: 0.8293985639666409
Accuracy: 0.7846481876332623
              precision    recall  f1-score   support

       False       0.87      0.83      0.85      1033
        True       0.59      0.65      0.61       374

    accuracy                           0.78      1407
   macro avg       0.73      0.74      0.73      1407
weighted avg       0.79      0.78      0.79      1407

[[862 171]
 [132 242]]


In [71]:
# Evaluation

from sklearn.metrics import classification_report, confusion_matrix

print("Classification Report (Optimized for Churn Recall):")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report (Optimized for Churn Recall):
              precision    recall  f1-score   support

       False       0.87      0.83      0.85      1033
        True       0.59      0.65      0.61       374

    accuracy                           0.78      1407
   macro avg       0.73      0.74      0.73      1407
weighted avg       0.79      0.78      0.79      1407

Confusion Matrix:
[[862 171]
 [132 242]]
