<a href="https://colab.research.google.com/github/Jainrani/Customer-Churn-Prediction/blob/main/Customer_Churn_Prediction_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CUSTOMER CHURN PREDICTION

Skills Learned:

Classification (Binary)

Encoding categorical variables

Train-Test split

Random Forest & Logistic Regression

Evaluation metrics (Accuracy, Precision, Recall, F1-score, Confusion Matrix)

Feature importance

Step 1: Import Libraries

In [None]:
# Data handling
import pandas as pd
import numpy as np

# ML models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns


Step 2: Load Dataset

In [None]:
# Load dataset
data = pd.read_csv('/content/WA_Fn-UseC_-Telco-Customer-Churn.csv')

# Quick look at data
print(data.head())
print(data.info())
print(data.isnull().sum())




Step 3: Data Preprocessing

Handle missing values

Encode categorical variables

Drop unnecessary columns

In [None]:
# Drop customerID (not useful)
data.drop('customerID', axis=1, inplace=True)

# Replace spaces in 'TotalCharges' with NaN and convert to numeric
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data['TotalCharges'].fillna(data['TotalCharges'].median(), inplace=True)

# Encode binary categorical variables
le = LabelEncoder()
for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = le.fit_transform(data[col])


Step 4: Split Features and Target

In [None]:
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Step 5: Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Step 6: Train Models
 1: Logistic Regression

In [None]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_scaled, y_train)

y_pred_lr = lr_model.predict(X_test_scaled)

print("Logistic Regression Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))


2: Random Forest Classifier

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

print("Random Forest Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))


Step 7: Confusion Matrix Visualization

In [None]:
cm = confusion_matrix(y_test, y_pred_rf)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - Random Forest')
plt.show()


Step 8: Feature Importance

In [None]:
feature_importance = pd.Series(rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
feature_importance.plot(kind='bar', figsize=(12,6))
plt.title('Feature Importance - Random Forest')
plt.show()


In [None]:
import joblib
joblib.dump(rf_model, 'customer_churn_model.pkl')
