Churn Prediction

In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the data
data = pd.read_csv('Datasets/Bank_Modelling.csv')
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [44]:
# feature selection
X = data.iloc[:, :10].values
Y = data.iloc[:, 10].values

In [45]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [46]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

# Apply OneHotEncoder to the first column (index 1)
ct = ColumnTransformer(
	transformers=[('Geography', OneHotEncoder(), [1])], remainder='passthrough')
X = ct.fit_transform(X)
X.astype(int)

array([[     1,      0,      0, ...,      1,      1, 101348],
       [     0,      0,      1, ...,      0,      1, 112542],
       [     1,      0,      0, ...,      1,      0, 113931],
       ...,
       [     1,      0,      0, ...,      0,      1,  42085],
       [     0,      1,      0, ...,      1,      0,  92888],
       [     1,      0,      0, ...,      1,      0,  38190]])

In [47]:
X = X[:, 1:]
X

array([[0.0, 0.0, 619, ..., 1, 1, 101348.88],
       [0.0, 1.0, 608, ..., 0, 1, 112542.58],
       [0.0, 0.0, 502, ..., 1, 0, 113931.57],
       ...,
       [0.0, 0.0, 709, ..., 0, 1, 42085.58],
       [1.0, 0.0, 772, ..., 1, 0, 92888.52],
       [0.0, 0.0, 792, ..., 1, 0, 38190.78]], dtype=object)

In [48]:
# splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [49]:
x_train

array([[0.0, 1.0, 667, ..., 1, 0, 163830.64],
       [1.0, 0.0, 427, ..., 1, 1, 57098.0],
       [0.0, 0.0, 535, ..., 1, 0, 185630.76],
       ...,
       [0.0, 0.0, 738, ..., 1, 0, 181429.87],
       [0.0, 1.0, 590, ..., 1, 1, 148750.16],
       [1.0, 0.0, 623, ..., 1, 0, 118855.26]], dtype=object)

In [50]:
x_test

array([[1.0, 0.0, 597, ..., 1, 1, 192852.67],
       [0.0, 0.0, 523, ..., 1, 0, 128702.1],
       [0.0, 1.0, 706, ..., 1, 1, 75732.25],
       ...,
       [0.0, 1.0, 578, ..., 1, 0, 141533.19],
       [1.0, 0.0, 650, ..., 1, 1, 11276.48],
       [1.0, 0.0, 573, ..., 1, 0, 192950.6]], dtype=object)

In [51]:
# feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [52]:
x_train

array([[-0.5698444 ,  1.74309049,  0.16958176, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368, -2.30455945, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.19119591, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.5698444 , -0.57369368,  0.9015152 , ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -0.62420521, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -0.28401079, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [53]:
x_test

array([[ 1.75486502, -0.57369368, -0.55204276, ...,  0.64259497,
         0.9687384 ,  1.61085707],
       [-0.5698444 , -0.57369368, -1.31490297, ...,  0.64259497,
        -1.03227043,  0.49587037],
       [-0.5698444 ,  1.74309049,  0.57162971, ...,  0.64259497,
         0.9687384 , -0.42478674],
       ...,
       [-0.5698444 ,  1.74309049, -0.74791227, ...,  0.64259497,
        -1.03227043,  0.71888467],
       [ 1.75486502, -0.57369368, -0.00566991, ...,  0.64259497,
         0.9687384 , -1.54507805],
       [ 1.75486502, -0.57369368, -0.79945688, ...,  0.64259497,
        -1.03227043,  1.61255917]])

In [55]:
#ANN
import keras
from keras.models import Sequential
from keras.layers import Dense

In [56]:
classifier = Sequential()
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
classifier.fit(x_train, y_train, batch_size=10, epochs=100)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 671us/step - accuracy: 0.8048 - loss: 0.5576
Epoch 2/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598us/step - accuracy: 0.7917 - loss: 0.4340
Epoch 3/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step - accuracy: 0.7928 - loss: 0.4339
Epoch 4/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 541us/step - accuracy: 0.8175 - loss: 0.4265
Epoch 5/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563us/step - accuracy: 0.8266 - loss: 0.4185
Epoch 6/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 545us/step - accuracy: 0.8312 - loss: 0.4162
Epoch 7/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 536us/step - accuracy: 0.8226 - loss: 0.4272
Epoch 8/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 572us/step - accuracy: 0.8349 - loss: 0.4087
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x25c6c928a90>

In [69]:
classifier1 = Sequential()
classifier1.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))
classifier1.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
classifier1.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
classifier1.fit(x_train, y_train, batch_size=8, epochs=200)

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 585us/step - accuracy: 0.7911 - loss: 0.5619
Epoch 2/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 559us/step - accuracy: 0.7965 - loss: 0.4236
Epoch 3/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 553us/step - accuracy: 0.8252 - loss: 0.4155
Epoch 4/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 546us/step - accuracy: 0.8322 - loss: 0.4089
Epoch 5/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549us/step - accuracy: 0.8307 - loss: 0.4167
Epoch 6/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 551us/step - accuracy: 0.8358 - loss: 0.4044
Epoch 7/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 557us/step - accuracy: 0.8370 - loss: 0.3999
Epoch 8/200
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 557us/step - accuracy: 0.8271 - loss: 0.4094
Epoch 9/200


<keras.src.callbacks.history.History at 0x25c6ce79f90>

In [58]:
classifier.summary()

In [61]:
# Predicting the Test set results
y_pred = classifier.predict(x_test)
y_pred

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552us/step


array([[0.20848496],
       [0.35824215],
       [0.15955624],
       ...,
       [0.17064899],
       [0.1876711 ],
       [0.11044502]], dtype=float32)

In [62]:
y_pred = (y_pred > 0.5)
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [63]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1546,   49],
       [ 264,  141]])

In [60]:
classifier.evaluate(x_test, y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 919us/step - accuracy: 0.8421 - loss: 0.3851


[0.39618316292762756, 0.843500018119812]

In [70]:
classifier1.summary()

In [71]:
# Predicting the Test set results
y_pred = classifier1.predict(x_test)
y_pred

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 855us/step


array([[0.19940533],
       [0.3204929 ],
       [0.10843613],
       ...,
       [0.20113608],
       [0.10172099],
       [0.21279635]], dtype=float32)

In [72]:
y_pred = (y_pred > 0.5)
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [73]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1519,   76],
       [ 203,  202]])

In [75]:
classifier1.evaluate(x_test, y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 919us/step - accuracy: 0.8617 - loss: 0.3252


[0.33596110343933105, 0.8604999780654907]