TASK 3: CUSTOMER CHURN PREDICTION

In [None]:
'''Develop a model to predict customer churn for a subscription
based service or business. Use historical customer data, including
features like usage behavior and customer demographics, and try
algorithms like Logistic Regression, Random Forests, or Gradient
Boosting to predict churn.'''

In [None]:
# importing required libraries and packages

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [None]:
# Load the dataset
data = pd.read_csv('Churn_Modelling.csv')

In [None]:
# Printing the first few rows from the dataset
print(data.head())

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

In [None]:
y = data['HasCrCard']
print(y.head())

0    1
1    0
2    1
3    0
4    1
Name: HasCrCard, dtype: int64


In [None]:
# Handling any missing values
data.fillna(0, inplace = True)

In [None]:
# Encode the categorical variables
data = pd.get_dummies(data, drop_first = True)

In [None]:
# Spliting the data into features and target variable
x = data.drop('HasCrCard', axis = 1)
y = data['HasCrCard']

In [None]:
# Split data into training and testing set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [None]:
# Training the Random Forest Model
rf_model = RandomForestClassifier(n_estimators=50, random_state = 42)
rf_model.fit(x_train, y_train)

In [None]:
# Making predictions
rf_prediction = rf_model.predict(x_test)

In [None]:
# Evaluating the model - For Random Forest Classification
print("Random Forest Classification Report: \n", classification_report(y_test,rf_prediction))
print("Random Forest ROC-AUC Score: \n", roc_auc_score(y_test, rf_prediction))

Random Forest Classification Report: 
               precision    recall  f1-score   support

           0       0.28      0.01      0.03       573
           1       0.71      0.99      0.83      1427

    accuracy                           0.71      2000
   macro avg       0.49      0.50      0.43      2000
weighted avg       0.59      0.71      0.60      2000

Random Forest ROC-AUC Score: 
 0.4996227088890275


In [None]:
# Training for Logistic Regression Model
logistic = LogisticRegression(max_iter = 1000, random_state = 42)
logistic.fit(x_train, y_train)

In [None]:
# Making Predictions
logistic = logistic.predict(x_test)

In [None]:
# Evaluating the Logistic Regression Model
print("Logistic Regression Classification Report: \n", classification_report(y_test, logistic))
print("Logistic Regression ROC-AUC Score: ", roc_auc_score(y_test, logistic))

Logistic Regression Classification Report: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       573
           1       0.71      1.00      0.83      1427

    accuracy                           0.71      2000
   macro avg       0.36      0.50      0.42      2000
weighted avg       0.51      0.71      0.59      2000

Logistic Regression ROC-AUC Score:  0.5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Training Gradient Boosting Model
gradient_model = GradientBoostingClassifier(n_estimators = 100, random_state = 42)
gradient_model.fit(x_train, y_train)

In [None]:
# Making predictions
gradient_prediction = gradient_model.predict(x_test)

In [None]:
# Evaluating the Gradient boosting model
print("Gradient Boosting Classifiction Report: \n", classification_report(y_test, gradient_prediction))
print("Gradient Boosting ROC-AUC Score: ", roc_auc_score(y_test, gradient_prediction))

Gradient Boosting Classifiction Report: 
               precision    recall  f1-score   support

           0       0.12      0.00      0.01       573
           1       0.71      0.99      0.83      1427

    accuracy                           0.71      2000
   macro avg       0.41      0.50      0.42      2000
weighted avg       0.54      0.71      0.59      2000

Gradient Boosting ROC-AUC Score:  0.4964894193385848
