# Customer Churn Prediction

## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression

from xgboost import XGBClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import warnings
warnings.filterwarnings("ignore")

## Exploration

In [None]:
df = pd.read_csv('Telco-Customer-Churn.csv')
df.head()

In [None]:
ax = sns.barplot(x=df['Churn'].value_counts().index, y=df['Churn'].value_counts().values)
plt.xlabel('Churn')
plt.ylabel('Count')
plt.title('Churn Value Counts')

total = df.shape[0]
for p in ax.patches:
    count = int(p.get_height())
    percentage = 100 * count / total
    ax.annotate(f'{percentage:.1f}%', 
                (p.get_x() + p.get_width() / 2, p.get_height()), 
                ha='center', va='bottom')

plt.show()

## Data Preprcoessing

In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.fillna(df.median(numeric_only=True), inplace=True)

df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

df = df[~df['Churn'].isna()]

binary_cols = [col for col in df.columns if df[col].nunique() == 2 and df[col].dtype == 'object' and col != 'Churn']
for col in binary_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

df = pd.get_dummies(df, drop_first=True)

## Train Test Split

In [None]:
X = df.drop(['Churn'], axis=1)
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Scale

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Training

Model comparison: Logistic Regression, XGBoost, and a simple Neural Network

In [None]:
# Logistic Regression
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train_scaled, y_train)
lr_preds = lr_model.predict(X_test_scaled)
lr_probs = lr_model.predict_proba(X_test_scaled)[:, 1]

# XGBoost 
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train_scaled, y_train)
xgb_preds = xgb_model.predict(X_test_scaled)
xgb_probs = xgb_model.predict_proba(X_test_scaled)[:, 1]

# Deep Learning
nn_model = Sequential([
    Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
nn_model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, verbose=0)
nn_probs = nn_model.predict(X_test_scaled).flatten()
nn_preds = (nn_probs > 0.5).astype(int)

## Model Evaluation

In [None]:
# Evaluation
for name, preds, probs in [
    ("Logistic Regression", lr_preds, lr_probs),
    ("XGBoost", xgb_preds, xgb_probs),
    ("Neural Network", nn_preds, nn_probs)
]:
    print(f"{name}:\n", classification_report(y_test, preds))
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d')
    plt.title(f"{name} Confusion Matrix")
    plt.show()

## Prediction

In [None]:
multi_customers = X.iloc[:5].copy()
multi_customers['MonthlyCharges'] = 100
multi_customers['tenure'] = 2

scaled_multi = scaler.transform(multi_customers)
predictions = nn_model.predict(scaled_multi).flatten()

for i, prob in enumerate(predictions):
    print(f"Customer {i+1}: Churn Probability = {prob:.2%}, Predicted Class = {'Churn' if prob > 0.5 else 'No Churn'}")