In [2]:
# Part 1 - Data Preprocessing

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')
#Looking at the features we can see that row no.,name will have no relation with a customer with leaving the bank
#so we drop them from X which contains the features Indexes from 3 to 12
X = dataset.iloc[:, 3:13].values
#We store the Dependent value/predicted value in y by storing the 13th index in the variable y
y = dataset.iloc[:, 13].values
#Printing out the values of X --> Which contains the features
#                           y --> Which contains the target variable
print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


In [4]:
# 10 input columns
print(X[0:5,:])

[[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 'Female' 43 2 125510.82 1 1 1 79084.1]]


In [5]:
# Encoding categorical data
# Now we encode the string values in the features to numerical values
# The only 2 values are Gender and Region which need to converted into numerical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
#creating label encoder object no. 1 to encode region name(index 1 in features)
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
#encoding region from string to just 3 no.s 0,1,2 respectively

In [6]:
print(X[0:5,:])

[[619 0 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 2 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 0 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 0 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 2 'Female' 43 2 125510.82 1 1 1 79084.1]]


In [7]:
labelencoder_X_2 = LabelEncoder()
#creating label encoder object no. 2 to encode Gender name(index 2 in features)
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
#encoding Gender from string to just 2 no.s 0,1(male,female) respectively

In [8]:
print(X[0:5,:])

[[619 0 0 42 2 0.0 1 1 1 101348.88]
 [608 2 0 41 1 83807.86 1 0 1 112542.58]
 [502 0 0 42 8 159660.8 3 1 0 113931.57]
 [699 0 0 39 1 0.0 2 0 0 93826.63]
 [850 2 0 43 2 125510.82 1 1 1 79084.1]]


In [34]:
from sklearn.preprocessing import OneHotEncoder
exampl = np.array([['Male', 1], ['Female', 3], ['Trans', 2], ['Male',10],['Female',7]])
labenc = LabelEncoder()
exampl[:, 0] = labenc.fit_transform(exampl[:, 0])
exampl

array([['1', '1'],
       ['0', '3'],
       ['2', '2'],
       ['1', '10'],
       ['0', '7']], dtype='<U6')

In [37]:
enc = OneHotEncoder(categorical_features = [0])
exampl = enc.fit_transform(exampl).toarray()
exampl
# enc.transform([['Female', 1], ['Male', 4]]).toarray()

array([[ 0.,  1.,  0.,  1.],
       [ 1.,  0.,  0.,  3.],
       [ 0.,  0.,  1.,  2.],
       [ 0.,  1.,  0., 10.],
       [ 1.,  0.,  0.,  7.]])

In [38]:
#Now creating Dummy variables
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In [39]:
print(X[0:5,:])

[[0.0000000e+00 0.0000000e+00 6.1900000e+02 0.0000000e+00 4.2000000e+01
  2.0000000e+00 0.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00
  1.0134888e+05]
 [1.0000000e+00 0.0000000e+00 6.0800000e+02 0.0000000e+00 4.1000000e+01
  1.0000000e+00 8.3807860e+04 1.0000000e+00 0.0000000e+00 1.0000000e+00
  1.1254258e+05]
 [0.0000000e+00 0.0000000e+00 5.0200000e+02 0.0000000e+00 4.2000000e+01
  8.0000000e+00 1.5966080e+05 3.0000000e+00 1.0000000e+00 0.0000000e+00
  1.1393157e+05]
 [0.0000000e+00 0.0000000e+00 6.9900000e+02 0.0000000e+00 3.9000000e+01
  1.0000000e+00 0.0000000e+00 2.0000000e+00 0.0000000e+00 0.0000000e+00
  9.3826630e+04]
 [1.0000000e+00 0.0000000e+00 8.5000000e+02 0.0000000e+00 4.3000000e+01
  2.0000000e+00 1.2551082e+05 1.0000000e+00 1.0000000e+00 1.0000000e+00
  7.9084100e+04]]


In [40]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [42]:
X_train[:5,:], X_test[:2,:]

(array([[1.0000000e+00, 0.0000000e+00, 6.6700000e+02, 0.0000000e+00,
         3.4000000e+01, 5.0000000e+00, 0.0000000e+00, 2.0000000e+00,
         1.0000000e+00, 0.0000000e+00, 1.6383064e+05],
        [0.0000000e+00, 1.0000000e+00, 4.2700000e+02, 1.0000000e+00,
         4.2000000e+01, 1.0000000e+00, 7.5681520e+04, 1.0000000e+00,
         1.0000000e+00, 1.0000000e+00, 5.7098000e+04],
        [0.0000000e+00, 0.0000000e+00, 5.3500000e+02, 0.0000000e+00,
         2.9000000e+01, 2.0000000e+00, 1.1236734e+05, 1.0000000e+00,
         1.0000000e+00, 0.0000000e+00, 1.8563076e+05],
        [1.0000000e+00, 0.0000000e+00, 6.5400000e+02, 1.0000000e+00,
         4.0000000e+01, 5.0000000e+00, 1.0568363e+05, 1.0000000e+00,
         1.0000000e+00, 0.0000000e+00, 1.7361709e+05],
        [1.0000000e+00, 0.0000000e+00, 8.5000000e+02, 0.0000000e+00,
         5.7000000e+01, 8.0000000e+00, 1.2677630e+05, 2.0000000e+00,
         1.0000000e+00, 1.0000000e+00, 1.3229849e+05]]),
 array([[0.0000000e+00, 1.0000000

In [44]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

In [45]:
X_train = sc.fit_transform(X_train)
X_train

array([[ 1.74309049, -0.5698444 ,  0.16958176, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [-0.57369368,  1.75486502, -2.30455945, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.57369368, -0.5698444 , -1.19119591, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.57369368, -0.5698444 ,  0.9015152 , ...,  0.64259497,
        -1.03227043,  1.41231994],
       [ 1.74309049, -0.5698444 , -0.62420521, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [-0.57369368,  1.75486502, -0.28401079, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [46]:
X_test = sc.transform(X_test)

In [54]:
X_test[:2,:]

array([[-0.57369368,  1.75486502, -0.55204276, -1.09168714, -0.36890377,
         1.04473698,  0.8793029 , -0.92159124,  0.64259497,  0.9687384 ,
         1.61085707],
       [-0.57369368, -0.5698444 , -1.31490297, -1.09168714,  0.10961719,
        -1.031415  ,  0.42972196, -0.92159124,  0.64259497, -1.03227043,
         0.49587037]])

## Part 2 - Now let's make the ANN!

### Listing out the steps involved in training the ANN with Stochastic Gradient Descent
1)Randomly initialize the weights to small numbers close to 0(But not 0)
2)Input the 1st observation of your dataset in the Input Layer, each Feature in one Input Node
3)Forward-Propagation from Left to Right, the neurons are activated in a way that the impact of each neuron's activation is limited by the weights.Propagate the activations until getting the predicted result y.
4)Compare the predicted result with the actual result. Measure the generated error.
5)Back-Propagation: From Right to Left, Error is back  propagated.Update the weights according to how much they are
responsible for the error.The Learning Rate tells us by how much such we update the weights.
6)Repeat Steps 1 to 5 and update the weights after each observation(Reinforcement Learning).
Or: Repeat Steps 1 to 5 but update the weights only after a batch of observations(Batch Learning)  
7)When the whole training set is passed through the ANN.That completes an Epoch. Redo more Epochs

In [51]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential#For building the Neural Network layer by layer
from keras.layers import Dense#To randomly initialize the weights to small numbers close to 0(But not 0)

In [62]:
# Initialising the ANN
#So there are actually 2 ways of initializing a deep learning model
#------1)Defining each layer one by one
#------2)Defining a Graph
classifier = Sequential()
#We did not put any parameter in the Sequential object as we will be defining the Layers manually

In [63]:
# Adding the input layer and the first hidden layer
#This remains an unanswered question till date that how many nodes of the hidden layer do we actually need
# There is no thumb rule but you can set the number of nodes in Hidden Layers as an Average of the number of Nodes in Input and Output Layer Respectively.
#Here avg= (11+1)/2==>6 So set Output Dim=6
#Init will initialize the Hidden Layer weights uniformly
#Activation Function is Rectifier Activation Function
#Input dim tells us the number of nodes in the Input Layer.This is done only once and wont be specified in further layers.
# classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(activation="relu", input_dim=11, units=6, kernel_initializer="uniform"))

In [64]:
# Adding the second hidden layer
# classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))
classifier.add(Dense(activation="relu", units=6, kernel_initializer="uniform"))

In [65]:
# Adding the output layer
# classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
classifier.add(Dense(activation="sigmoid", units=1, kernel_initializer="uniform"))

In [66]:
#Sigmoid activation function is used whenever we need Probabilities of 2 categories or less(Similar to Logistic Regression)
#Switch to Softmax when the dependent variable has more than 2 categories

In [67]:
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [68]:
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)

  


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x15503c50>

In [69]:
# Part 3 - Making the predictions and evaluating the model
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)#if y_pred is larger than 0.5 it returns true(1) else false(2)
print(y_pred)

[[False]
 [False]
 [False]
 ...
 [False]
 [False]
 [False]]


In [70]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1559   36]
 [ 286  119]]


In [71]:
accuracy=(1559+119)/2000 #Obtained from Confusion Matrix
print(accuracy)

0.839
