In [101]:
# ANN
# dataset description :-
# fictional dataset of a bank having 10,000 customers and therefore, 10,000 rows
# the bank is facing a high churn rate (rate at which people are leving the bank) - we need to predict which of the customers are
# at highest risk of leaving - classification problem
# columns correspond to diff features of the customers like gender, country, num of products, age, balance, etc
# last column is whether the customer exited or not

In [67]:
# libraries :-
# 1. THEANO - open source numerical computations library - v fast for numerical computations - based on numpy syntax - can run
# CPU as well as on GPU(much more powerful)
# 2. Tensorflow
# 3. keras - wraps the above 2 libraries
# 1 and 2 are used to build deep learning models from scratch(for R n D purposes).

DATA PREPROCESSING

In [68]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [69]:
dataset = pd.read_csv('Churn_Modelling.csv')

In [70]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [71]:
X = dataset.iloc[:, 3:13].values # rowNumber, customerId and surname play NO role in churn rate 
y = dataset.iloc[:, 13].values
X.shape, y.shape

((10000, 10), (10000,))

In [72]:
X[0]

array([619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [73]:
y

array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

In [74]:
y[0] # y contains values 0 or 1

1

In [75]:
# encode categorical variables (country and gender) before splitting the data 

In [76]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

#encode country
encoder1 = LabelEncoder()
X[:,1] = encoder1.fit_transform(X[:,1])

# encode gender - we'll not onehot encode this cz it only has 2 categories - since we'll be removing one of the columns to avoid 
# dummy variable trap, it'll be no use to one hot encode this
encoder2 = LabelEncoder()
X[:,2] = encoder2.fit_transform(X[:,2])

In [123]:
# one hot encode country
ohe = OneHotEncoder(categorical_features = [1])
X = ohe.fit_transform(X).toarray()
X[0], X[0].shape

array([0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 6.0800000e+02,
       0.0000000e+00, 4.1000000e+01, 1.0000000e+00, 8.3807860e+04,
       1.0000000e+00, 0.0000000e+00, 1.0000000e+00, 1.1254258e+05])

In [78]:
# remove dummy variable we get after ohe
X = X[:,1:] # will remove 1st column
X[0], X[0].shape

(array([0.0000000e+00, 0.0000000e+00, 6.1900000e+02, 0.0000000e+00,
        4.2000000e+01, 2.0000000e+00, 0.0000000e+00, 1.0000000e+00,
        1.0000000e+00, 1.0000000e+00, 1.0134888e+05]), (11,))

In [79]:
X.shape, y.shape

((10000, 11), (10000,))

In [80]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [81]:
X_train.shape, y_train.shape

((8000, 11), (8000,))

In [82]:
X_test.shape, y_test.shape

((2000, 11), (2000,))

In [83]:
# do feature scaling - highly recommended in neural networks - we dont want to have one independent var dominating another one
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [86]:
X_train[0]

array([-0.5698444 ,  1.74309049,  0.16958176, -1.09168714, -0.46460796,
        0.00666099, -1.21571749,  0.8095029 ,  0.64259497, -1.03227043,
        1.10643166])

In [87]:
X_test[0]

array([ 1.75486502, -0.57369368, -0.55204276, -1.09168714, -0.36890377,
        1.04473698,  0.8793029 , -0.92159124,  0.64259497,  0.9687384 ,
        1.61085707])

BUILDING THE ANN MODEL

In [96]:
import keras
from keras.models import Sequential 
from keras.layers import Dense

In [97]:
classifier = Sequential() # initialising the model

In [98]:
# add input layer and first hidden layer
# TIP : try taking no of node in the hidden layer as average of the no of nodes in the input layer and that in the output layer
# or can do parameter tuning , cross validation
# no of nodes in input layer = 11 ( X has 11 features)
# no of nodes in output layer = 1 ( coresponding to y value)
# we're choosing 6 for no of nodes in hidden layer ( average of 11 and 1)
classifier.add(Dense(units = 6, kernel_initializer='uniform', activation = 'relu', input_dim = 11)) 
# input_dim is coompulsory only for the first (input) layer

In [99]:
# add second hidden layer
classifier.add(Dense(units = 6, kernel_initializer='uniform', activation = 'relu')) 

In [100]:
# add output layer
classifier.add(Dense(units = 1, kernel_initializer='uniform', activation = 'sigmoid'))
# use sigmoid in output layer 
# if we had more than 2 categories, we would have used softmax fn - similar to sigmoid but for multiple categories

In [102]:
# compile the ann
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# optimizer - the algo we want to use to find the optimal set of weights in the nn - adam optimizer is a v efficient type
# of stochastoc grad optimization
# loss - the loss function used within the sgd adam algo 
# binary_crossentropy - used with binary classification
# categorical_crossentropy - used with multiclass classification
# metrics - metrics used by algo after every iteration to improve performance

In [104]:
# we havent made any connection between the model and our data yet
# fitting the ann to the training set
classifier.fit(X_train, y_train, batch_size = 10, epoch = 100)

  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1b87c6c1470>

In [106]:
# accuracy converging at about 83.6%

MAKING PREDICTIONS AND EVALUATING THE MODEL

In [107]:
y_pred = classifier.predict(X_test)

In [109]:
y_pred # gives probabilities that a customer will leave the bank

array([[0.20845954],
       [0.31870234],
       [0.16355684],
       ...,
       [0.16943139],
       [0.14612108],
       [0.10946299]], dtype=float32)

In [116]:
# convert y_pred in the form true/false
y_pred = (y_pred > 0.5) # return true if y_pred > 0.5
y_pred, y_pred.shape

(array([[False],
        [False],
        [False],
        ...,
        [False],
        [False],
        [False]]), (2000, 1))

In [115]:
# make the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1544,   51],
       [ 259,  146]], dtype=int64)

In [117]:
# out of 2000 observations, we get 1544+146 correct predictions and 51+259 wrong predictions
# accuracy = (no of correct preds)/(no of total preds)
accuracy = (1544+146)/2000
accuracy # on test set

0.845

In [118]:
# results
# training set accuracy = ~86%
# test set accuracy = ~84%

PREDICTING RESULTS FOR NEW TEST POINT

In [129]:
new_test_point = np.array([[0,0,600,1,40,3,60000,2,1,1,50000]])
new_test_point = sc.transform(new_test_point) # scaling
#new_test_point = new_test_point.reshape(-1,1)
new_pred = classifier.predict(new_test_point)



In [130]:
new_pred

array([[0.07640162]], dtype=float32)

In [132]:
new_pred = (new_pred > 0.5)
new_pred

array([[False]])

In [133]:
# the customer doesnt leave the bank