In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [35]:
dataset = pd.read_csv('drive/MyDrive/Colab Notebooks/ANN/Churn_Modelling.csv')
dataset.head(3)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


In [36]:
#Since the first 3 columns are pf no use to the modelling, we remove it
x = dataset.iloc[:, 3:13]
y = dataset.iloc[:, 13:]

In [37]:
# We need to encode the categorical features like Geography and Gender
# We use OHE for Geagraphy and Label encoding of Gender
geography = pd.get_dummies(x['Geography'], drop_first=True)
gender = pd.get_dummies(x['Gender'], drop_first=True)

In [38]:
# We ned to concatenate the newly encoded columns with x
x = pd.concat([x, geography, gender], axis=1)

In [39]:
# We  need to delete the original column from which we have encoded
# Else the algorithm faces multicollinearity
x = x.drop(['Geography', 'Gender'], axis=1)

In [40]:
# We will split the data into train-test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 0)

In [41]:
# Since it is a ANN, we need to do Feature Scaling.
# This is because to reduce the variance of output and time taken to predict
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [42]:
# Creating the Artificial Neural Network
# We import keras and other dependent libraries like Relu, loss fcn etc.
import keras
# We use "Sequential" to create layer-by-layer models like ANN
# It does not allow layer sharing and multiple imput models like RNN
from keras.models import Sequential
# Dense is used in most of the NN architecture
# It is a layer that is deeply connected, i.e. one neuron in a dense has many inputs from the previous layer
from keras.layers import Dense
from keras.layers import LeakyReLU, PReLU, ELU
# Dropout is a regularization parameter used when the NN has many layers.
# this is done to prevent overfitting
from keras.layers import Dropout

In [43]:
# Initializing the ANN
# It is a process of creating a empty ANN
classifier = Sequential()

In [44]:
# adding input layer and first hidden layer
# We are using 6 node hidden layer. So units = 6
# we are using HE uniform as a weight initialization fcn.
# We are using ReLu activation fcn
# Since we have 11 input columns, input_dim = 11. it should be specified to the first hidden layer
classifier.add(Dense(units=6, kernel_initializer='he_uniform', activation='relu', input_dim = 11))

In [45]:
# We add another hidden layer with same parameters.
classifier.add(Dense(units=6, kernel_initializer='he_uniform', activation='relu'))

In [46]:
# Now we add a output layer.
# Since it is a Classfn. problem, we use sigmoid AF.
# since we have only one output column, we use output_dim = 1
# We use Xavier-Glorot weight initialization fcn.
classifier.add(Dense(units=1, kernel_initializer='glorot_uniform', activation='sigmoid'))

In [47]:
classifier.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 6)                 72        
                                                                 
 dense_5 (Dense)             (None, 6)                 42        
                                                                 
 dense_6 (Dense)             (None, 1)                 7         
                                                                 
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


In [48]:
# Now we need to define the loss fcn, metrics, optimizer we are using to calculate weights
classifier.compile(optimizer='adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [49]:
model_history = classifier.fit(x_train, y_train, validation_split=0.33, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [50]:
# Predicting the values for test data
y_pred = classifier.predict(x_test)
y_pred = (y_pred > 0.5)

In [51]:
# creating confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [52]:
cm

array([[1505,   90],
       [ 195,  210]])

In [53]:
# Print accuracy
from sklearn.metrics import accuracy_score
score = accuracy_score(y_pred, y_test)
score

0.8575