# 📊 Customer Churn Prediction Project
This notebook covers:
- Data loading
- Preprocessing
- Exploratory Data Analysis
- Model training and evaluation
- Model saving using `joblib`


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, roc_curve

sns.set(style='whitegrid')

## 1️⃣ Load Dataset

In [None]:
# Update the path as needed for your environment
data = pd.read_csv('Telco-Customer-Churn_dataset.csv')
data.head()

## 2️⃣ Data Cleaning & Preprocessing

In [None]:
# Drop 'customerID' as it is not useful for prediction
data.drop('customerID', axis=1, inplace=True)

# Convert 'TotalCharges' to numeric and handle missing
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data.dropna(inplace=True)

# Label encoding categorical columns
cat_cols = data.select_dtypes(include='object').columns
le = LabelEncoder()
for col in cat_cols:
    data[col] = le.fit_transform(data[col])

data.head()

## 3️⃣ Exploratory Data Analysis (EDA)

In [None]:
# Describe data
data.describe()

In [None]:
# Churn distribution
plt.figure(figsize=(5, 4))
sns.countplot(x='Churn', data=data)
plt.title('Churn Distribution')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), cmap='coolwarm', annot=False)
plt.title('Correlation Heatmap')
plt.show()

## 4️⃣ Feature and Target Split

In [None]:
X = data.drop('Churn', axis=1)
y = data['Churn']

## 5️⃣ Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## 6️⃣ Model Building - Random Forest

In [None]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]

## 7️⃣ Model Evaluation

In [None]:
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_prob))

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f'AUC = {roc_auc_score(y_test, y_pred_prob):.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

## 8️⃣ Feature Importance

In [None]:
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
features = X.columns

plt.figure(figsize=(10, 6))
sns.barplot(x=importances[indices], y=features[indices], palette='viridis')
plt.title('Feature Importances')
plt.show()

## 9️⃣ Save the Trained Model

In [None]:
joblib.dump(model, 'customer_churn_model.pkl')
print("Model saved as 'customer_churn_model.pkl'")

✅ **Your customer churn prediction pipeline is now cleanly structured in this notebook for training, evaluation, and future model use.**