In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
data = pd.read_csv('Churn_Modelling.csv')

In [3]:
data.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [4]:
print(data.isnull().sum())

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64


In [5]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data = pd.get_dummies(data, columns=['Geography', 'Gender'], drop_first=True)

In [6]:
X = data.drop('Exited', axis=1)
y = data['Exited']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

In [10]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

In [11]:
gb_model = GradientBoostingClassifier()
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)

In [12]:
print("Logistic Regression:")
print("\n Accuracy:", accuracy_score(y_test, lr_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, lr_pred))
print("\n Classification Report:\n", classification_report(y_test, lr_pred))

Logistic Regression:

 Accuracy: 0.8113333333333334

 Confusion Matrix:
 [[2318   98]
 [ 468  116]]

 Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.96      0.89      2416
           1       0.54      0.20      0.29       584

    accuracy                           0.81      3000
   macro avg       0.69      0.58      0.59      3000
weighted avg       0.78      0.81      0.77      3000



In [13]:
print("\nRandom Forest:")
print("\n Accuracy:", accuracy_score(y_test, rf_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, rf_pred))
print("\n Classification Report:\n", classification_report(y_test, rf_pred))


Random Forest:

 Accuracy: 0.8716666666666667

 Confusion Matrix:
 [[2343   73]
 [ 312  272]]

 Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      2416
           1       0.79      0.47      0.59       584

    accuracy                           0.87      3000
   macro avg       0.84      0.72      0.75      3000
weighted avg       0.86      0.87      0.86      3000



In [14]:
print("\nGradient Boosting:")
print("\n Accuracy:", accuracy_score(y_test, gb_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, gb_pred))
print("\nClassification Report:\n", classification_report(y_test, gb_pred))


Gradient Boosting:

 Accuracy: 0.871

Confusion Matrix:
 [[2330   86]
 [ 301  283]]

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.92      2416
           1       0.77      0.48      0.59       584

    accuracy                           0.87      3000
   macro avg       0.83      0.72      0.76      3000
weighted avg       0.86      0.87      0.86      3000

