# Artificial Neural Network

https://keras.io/getting_started/

### Data Preprocessing :

In [1]:
# Importing the libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Churn_data.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
# as we dont need the features like RowNumber	CustomerId	Surnam.
# iam dropping those features 
df = df.drop(["RowNumber","CustomerId","Surname"],axis=1)

In [4]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
#Dividing data into x and y
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

### One-hot encoding 

In [6]:
#Create dummy variables 
Geography = pd.get_dummies(x["Geography"],drop_first=True)
Gender = pd.get_dummies(x["Gender"],drop_first=True)

In [7]:
#Concatenate the Data Frames
x = pd.concat([x,Geography,Gender],axis=1)
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,France,Female,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,1,0


In [8]:
#Dropping unnessary columns 
x = x.drop(["Geography","Gender"],axis=1)

In [9]:
x.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [10]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=5)

In [11]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [12]:
x_train

array([[-1.44395693e+00,  2.27797473e+00,  1.04411743e+00, ...,
        -5.85268586e-01, -5.72071527e-01,  9.07409229e-01],
       [-4.13114515e-01,  5.72265460e-01, -6.93265378e-01, ...,
        -5.85268586e-01,  1.74803316e+00, -1.10203860e+00],
       [-1.68105068e+00,  2.08845148e+00,  1.68774330e-03, ...,
         1.70861725e+00, -5.72071527e-01, -1.10203860e+00],
       ...,
       [-6.91441966e-01, -7.54397306e-01, -3.45788818e-01, ...,
        -5.85268586e-01, -5.72071527e-01, -1.10203860e+00],
       [ 2.98166750e-01, -2.80589175e-01,  3.49164304e-01, ...,
        -5.85268586e-01, -5.72071527e-01,  9.07409229e-01],
       [-1.66043383e+00,  1.23559684e+00, -1.38821850e+00, ...,
         1.70861725e+00, -5.72071527e-01, -1.10203860e+00]])

In [13]:
x_test

array([[ 1.22566172, -0.08036346,  0.32349206, ..., -0.56349184,
         1.73051257,  0.92419744],
       [-1.22747112, -0.46750174,  0.66544983, ..., -0.56349184,
        -0.57786347, -1.08201987],
       [ 0.80810719, -0.46750174,  1.34936539, ..., -0.56349184,
        -0.57786347, -1.08201987],
       ...,
       [ 2.07120963, -0.75785546, -1.38629683, ..., -0.56349184,
        -0.57786347,  0.92419744],
       [-0.24621799, -1.33856289,  0.66544983, ..., -0.56349184,
        -0.57786347,  0.92419744],
       [ 0.86030151, -0.17714803, -0.70238127, ..., -0.56349184,
        -0.57786347,  0.92419744]])

# Creating ANN

In [14]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout

In [15]:
# Initialising the ANN
# this is an empty nueral network
classifier = Sequential()

In [16]:
#with respect to keras 1.0 this is syntax
classifier.add(Dense(output_dim = 6,init = 'he_uniform',activation='relu',input_dim = 11))

TypeError: __init__() missing 1 required positional argument: 'units'

In [17]:
# output_dim = 6 means iam going to consider 6 neurons
# init = 'he_uniform' means how wiegts need to intilize.
#input_dim is 11 because we are giving 11 inputs to that neuron

In [18]:
#after keras 1.0 
# output_dim  parameter changed to "units"
# init parameter changed to "kernel_initializer"
classifier.add(Dense(units = 6,kernel_initializer='he_uniform',activation='relu',input_dim = 11))

In [19]:
# Adding the second hidden layer
classifier.add(Dense(units = 6,kernel_initializer='he_uniform',activation='relu'))

In [20]:
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

![image-4.png](attachment:image-4.png)

After adding two hidden layers and later we will do hyper optimizer technique to how many actually can i use hidden layers.

As of now we are just intilizing 2 hidden layers.later we will find how many layer should i take.

In [21]:
#to get summary about your nueral network
classifier.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 72        
                                                                 
 dense_1 (Dense)             (None, 6)                 42        
                                                                 
 dense_2 (Dense)             (None, 1)                 7         
                                                                 
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compiling the ANN
classifier.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [23]:
# we are using "Adamax" as our optimizer.
# and it better to use loss as "binary_crossentropy" for binary class classification
# where as "categorical_crossentropy" use for multclass classification

In [24]:
# Fitting the ANN to the Training set
model_history=classifier.fit(x_train, y_train,validation_split=0.33, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [48]:
# Predicting the Test set results
y_pred = classifier.predict(x_test)



In [26]:
y_pred = (y_pred > 0.5) 

In [27]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [28]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [29]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)

In [30]:
score

0.834

#### Other way to find accuracy:

In [31]:
y_pred_new = [ 1 if i>0.5 else 0  for i in y_pred]

In [32]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [33]:
cm

array([[2306,   97],
       [ 401,  196]], dtype=int64)

In [34]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred_new,y_test)

In [35]:
score

0.834

# Trying Other neural network 
* with one more hidden layer and 
* different no of neurons and 
* with he_normal as weights intilization tech

In [36]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout

In [37]:
# Initialising the ANN
# this is an empty nueral network
classifier2 = Sequential()

In [38]:
# Adding the input layer and the first hidden layer
#added newly is units = 10 and kernel_initializer='he_normal'
classifier2.add(Dense(units = 10,kernel_initializer='he_normal',activation='relu',input_dim = 11))

In [39]:
# Adding the 2rd hidden layer
#added newly is units = 20 and kernel_initializer='he_normal'
classifier2.add(Dense(units = 20,kernel_initializer='he_normal',activation='relu',input_dim = 11))

In [40]:
# Adding the 2rd hidden layer
#added newly is units = 15 and kernel_initializer='he_normal'
classifier2.add(Dense(units = 15,kernel_initializer='he_normal',activation='relu',input_dim = 11))

In [50]:
# Adding the output layer
classifier2.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

In [51]:
#to get summary about your nueral network
classifier2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 10)                120       
                                                                 
 dense_4 (Dense)             (None, 20)                220       
                                                                 
 dense_5 (Dense)             (None, 15)                315       
                                                                 
 dense_6 (Dense)             (None, 1)                 16        
                                                                 
Total params: 671
Trainable params: 671
Non-trainable params: 0
_________________________________________________________________


In [52]:
# Compiling the ANN
classifier2.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [53]:
# Fitting the ANN to the Training set
model_history=classifier2.fit(x_train, y_train,validation_split=0.33, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [60]:
y_new_pred = classifier2.predict(x_test)



In [62]:
y_new_pred

array([[0.0299222 ],
       [0.02599549],
       [0.06263547],
       ...,
       [0.0852984 ],
       [0.01223585],
       [0.04570496]], dtype=float32)

In [63]:
y_new_pred = (y_new_pred > 0.5)

In [64]:
y_new_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [67]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_new_pred)
cm

array([[2282,  121],
       [ 330,  267]], dtype=int64)

In [68]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred_new,y_test)
score

0.8496666666666667

#### Note :
* Almost we got same accuracy as we got before.
* but dont increse the no of hidden layers it may cause to over fitting problem

# By using dropout :
* Dropout is regulization parameter that we can use when we have nn quite deep}

In [69]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout

In [70]:
# Initialising the ANN
# this is an empty nueral network
classifier = Sequential()

In [71]:
# Adding the input layer and the first hidden layer
#added newly is units = 10 and kernel_initializer='he_normal'
classifier.add(Dense(units = 10,kernel_initializer='he_normal',activation='relu',input_dim = 11))
classifier.add(Dropout(0.3))

In [72]:
# Adding the 2rd hidden layer
#added newly is units = 20 and kernel_initializer='he_normal'
classifier.add(Dense(units = 20,kernel_initializer='he_normal',activation='relu',input_dim = 11))
classifier.add(Dropout(0.4))

In [73]:
# Adding the 2rd hidden layer
#added newly is units = 15 and kernel_initializer='he_normal'
classifier.add(Dense(units = 15,kernel_initializer='he_normal',activation='relu',input_dim = 11))
classifier.add(Dropout(0.2))

In [75]:
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

In [76]:
#to get summary about your nueral network
classifier.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 10)                120       
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense_8 (Dense)             (None, 20)                220       
                                                                 
 dropout_1 (Dropout)         (None, 20)                0         
                                                                 
 dense_9 (Dense)             (None, 15)                315       
                                                                 
 dropout_2 (Dropout)         (None, 15)                0         
                                                                 
 dense_10 (Dense)            (None, 1)                

In [77]:
# Compiling the ANN
classifier.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [78]:
# Fitting the ANN to the Training set
model_history=classifier.fit(x_train, y_train,validation_split=0.33, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [79]:
# Predicting the Test set results
y_pred = classifier.predict(x_test)
y_pred = (y_pred > 0.5)



In [80]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [83]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[2381,   22],
       [ 482,  115]], dtype=int64)

In [84]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred_new,y_test)
score

0.8496666666666667

#### Note :
* We mostly use dropout when we have more deep in neural network>

#### Note :
* Here we have taken hidden layers as randmoly 
* but we use some <b>techniques hyper parametimization</b> optimizer technique to get correct number of layers.