# **Predicting Bank Customer Churn using ANN**

# Problem Statment

The bank seeks to develop a predictive model to prevent customer churn. The objective is to accurately predict which customers are at risk of churning in the near future. By identifying potential churners early, the bank can implement targeted retention strategies to mitigate attrition and enhance customer satisfaction.

# Importing the dependinces

In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns


from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

In [4]:
df = pd.read_csv('/content/Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
X = df.iloc[: , 3:13].values
y = df.iloc[: , -1].values

X.shape,y.shape

((10000, 10), (10000,))

In [6]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

#  Data Preproccesing

In [7]:
Gender_encoder = LabelEncoder()
X[:,2] = Gender_encoder.fit_transform(X[:,2])

In [8]:
print(X)
print(X.shape)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]
(10000, 10)


In [9]:
one_hot_encoder = OneHotEncoder(categories='auto', sparse=False)  # sparse=False to get dense array
X_encoded = one_hot_encoder.fit_transform(X[:, [1]])

# Replace column 1 with the one-hot encoded values
X = np.concatenate((X[:, :1], X_encoded, X[:, 2:]), axis=1)




In [10]:
print(X)
print(X.shape)

[[619 1.0 0.0 ... 1 1 101348.88]
 [608 0.0 0.0 ... 0 1 112542.58]
 [502 1.0 0.0 ... 1 0 113931.57]
 ...
 [709 1.0 0.0 ... 0 1 42085.58]
 [772 0.0 1.0 ... 1 0 92888.52]
 [792 1.0 0.0 ... 1 0 38190.78]]
(10000, 12)


# Spliting the data for modeling

In [11]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling


In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
X_train

array([[ 0.35649971,  1.00150113, -0.57946723, ...,  0.64920267,
         0.97481699,  1.36766974],
       [-0.20389777, -0.99850112,  1.72572313, ...,  0.64920267,
         0.97481699,  1.6612541 ],
       [-0.96147213, -0.99850112, -0.57946723, ...,  0.64920267,
        -1.02583358, -0.25280688],
       ...,
       [ 0.86500853,  1.00150113, -0.57946723, ..., -1.54035103,
        -1.02583358, -0.1427649 ],
       [ 0.15932282,  1.00150113, -0.57946723, ...,  0.64920267,
        -1.02583358, -0.05082558],
       [ 0.47065475, -0.99850112,  1.72572313, ...,  0.64920267,
         0.97481699, -0.81456811]])

# Creating the Artificial Neural Network

In [14]:
# intialization the ANN

classifer = Sequential()

# adding the input layer
classifer.add(Dense(6,activation='relu', kernel_initializer='uniform'))


In [15]:
# adding hidden layer
classifer.add(Dense(6,activation='relu',kernel_initializer='uniform'))

In [16]:
# adding the output layer for prediction
classifer.add(Dense(1,activation='sigmoid',kernel_initializer='uniform'))

In [17]:
classifer.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
# Training the model
classifer.fit(X_train,y_train,batch_size=10,epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7e4d79c399c0>

In [19]:
# Testing the model
classifer.predict(X_test)



array([[0.13934359],
       [0.08199137],
       [0.40884194],
       ...,
       [0.47124115],
       [0.03135199],
       [0.2175855 ]], dtype=float32)

In [21]:
# Evaluating the model
test_loss , test_acc = classifer.evaluate(X_test,y_test)
print(f'loss cost value {test_loss}')
print(f'accuracy value for testing {test_acc}')

loss cost value 0.3923177421092987
accuracy value for testing 0.8414999842643738
