## Customer Churn Prediction

In [1]:
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import pickle

In [2]:
#Load Dataset
df = pd.read_csv('Customer_churn.csv')

In [3]:
#EDA
df.drop(['customerID'], axis=1, inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

In [4]:
#Convert target column
le = LabelEncoder()
df['Churn'] = le.fit_transform(df['Churn'])

In [5]:
#Encode categorical variables
df_encoded = pd.get_dummies(df.drop('Churn', axis=1), drop_first=True)
X = df_encoded
y = df['Churn']

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
#Model Training & Evaluation

def evaluate_model(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]
    acc = accuracy_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_proba)
    print(f"\n{name} Results:")
    print(f"Accuracy: {acc:.2f}")
    print(f"ROC-AUC: {roc:.2f}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    return model, acc, roc

In [9]:
#Logistic Regression
log_model, log_acc, log_roc = evaluate_model(LogisticRegression(max_iter=1000), "Logistic Regression")


Logistic Regression Results:
Accuracy: 0.79
ROC-AUC: 0.83
[[917 116]
 [186 188]]
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1033
           1       0.62      0.50      0.55       374

    accuracy                           0.79      1407
   macro avg       0.72      0.70      0.71      1407
weighted avg       0.77      0.79      0.78      1407



In [10]:
#Random Forest
rf_model, rf_acc, rf_roc = evaluate_model(RandomForestClassifier(), "Random Forest")


Random Forest Results:
Accuracy: 0.78
ROC-AUC: 0.81
[[921 112]
 [199 175]]
              precision    recall  f1-score   support

           0       0.82      0.89      0.86      1033
           1       0.61      0.47      0.53       374

    accuracy                           0.78      1407
   macro avg       0.72      0.68      0.69      1407
weighted avg       0.77      0.78      0.77      1407



In [11]:
#XGBoost
xgb_model, xgb_acc, xgb_roc = evaluate_model(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), "XGBoost")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



XGBoost Results:
Accuracy: 0.77
ROC-AUC: 0.81
[[903 130]
 [188 186]]
              precision    recall  f1-score   support

           0       0.83      0.87      0.85      1033
           1       0.59      0.50      0.54       374

    accuracy                           0.77      1407
   macro avg       0.71      0.69      0.69      1407
weighted avg       0.76      0.77      0.77      1407

