# Artificial Neural Network

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.5.0'

## Part 1 - Data Preprocessing

### Importing the dataset

In [4]:
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values #take column 3 until the last, we don't want row number, customerID, surname
y = dataset.iloc[:, -1].values

In [6]:
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [7]:
y

array([1, 0, 1, ..., 1, 1, 0])

In [4]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [5]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical data

We have two categorical data columns: gender and geography

Label Encoding the "Gender" column

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

In [11]:
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


One Hot Encoding the "Geography" column

In [12]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
# 1 is the index we want for column to be encoded
X = np.array(ct.fit_transform(X))

In [46]:
X

array([[1.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 0.0, 1.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

In [13]:
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Splitting the dataset into the Training set and Test set

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [15]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train) #learn the parameters of the scaling by using fit, then transform them
X_test = sc.transform(X_test)

In [16]:
X_train2 = sc.transform(X_train)

In [17]:
X_train

array([[-1.01460667, -0.5698444 ,  1.74309049, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [-1.01460667,  1.75486502, -0.57369368, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [ 0.98560362, -0.5698444 , -0.57369368, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [ 0.98560362, -0.5698444 , -0.57369368, ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-1.01460667, -0.5698444 ,  1.74309049, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [-1.01460667,  1.75486502, -0.57369368, ...,  0.64259497,
        -1.03227043,  0.32472465]])

In [18]:
X_train2

array([[-3.04403335, -1.89456703,  3.46467078, ..., -0.14326192,
        -3.09785266, -1.7410528 ],
       [-3.04403335,  3.50970685, -1.90281812, ..., -0.14326192,
         0.90618366, -1.74108504],
       [ 0.95680782, -1.89456703, -1.90281812, ..., -0.14326192,
        -3.09785266, -1.74104621],
       ...,
       [ 0.95680782, -1.89456703, -1.90281812, ..., -0.14326192,
        -3.09785266, -1.74104748],
       [-3.04403335, -1.89456703,  3.46467078, ..., -0.14326192,
         0.90618366, -1.74105735],
       [-3.04403335,  3.50970685, -1.90281812, ..., -0.14326192,
        -3.09785266, -1.74106638]])

## Part 2 - Building the ANN

### Initializing the ANN

In [21]:
ann = tf.keras.models.Sequential()
#sequential is sequence of layers (vs computational graph)

### Adding the input layer and the first hidden layer

In [22]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
#now we use Dense class, fully connected layer
# we have 6 hidden neurons
# we just pick a number, it is really just experimentation, there is no rule of thumb
# activation function must be rectifier function in the inside

### Adding the second hidden layer

In [23]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the output layer

In [24]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
#we just have one output, binary variable, so one neuron
#if we had three classes, then we'd put 3 here
#activation is sigmoid b/c we have 0/1 and this will give us the probability of whether they'll
#leave the bank or not

## Part 3 - Training the ANN

### Compiling the ANN

In [25]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
#optimizer: stochastic gradient, adam is really good
#loss: cross-entropy, the binary one (if cateogrical, categorical_crossentropy)
#metrics: accuracy

### Training the ANN on the Training set

In [26]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)
#batch size: batch size will be more efficient but we don't want the entire group, 32 is default
#epoch: how many iterations do we want it to try

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7fb3f14d3220>

Converge on .86. Really good!

## Part 4 - Making the predictions and evaluating the model

### Predicting the result of a single observation

**Homework**

Use our ANN model to predict if the customer with the following informations will leave the bank: 

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: \$ 60000

Number of Products: 2

Does this customer have a credit card ? Yes

Is this customer an Active Member: Yes

Estimated Salary: \$ 50000

So, should we say goodbye to that customer ?

**Solution**

In [45]:
ct.get_feature_names()

['encoder__x0_France',
 'encoder__x0_Germany',
 'encoder__x0_Spain',
 'x0',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x8',
 'x9']

In [18]:
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)

[[False]]


In [48]:
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) )

[[0.03455031]]


In [None]:
#first three columns are dropped anyway so don't worry 
#For France we code in 1, 0 , 0 because of one hot encoding, then we have our original variable

Therefore, our ANN model predicts that this customer stays in the bank!

**Important note 1:** Notice that the values of the features were all input in a double pair of square brackets. That's because the "predict" method always expects a 2D array as the format of its inputs. And putting our values into a double pair of square brackets makes the input exactly a 2D array.

**Important note 2:** Notice also that the "France" country was not input as a string in the last column but as "1, 0, 0" in the first three columns. That's because of course the predict method expects the one-hot-encoded values of the state, and as we see in the first row of the matrix of features X, "France" was encoded as "1, 0, 0". And be careful to include these values in the first three columns, because the dummy variables are always created in the first columns.

### Predicting the Test set results

In [47]:
y_pred = ann.predict(X_test)
y_pred

array([[0.18085283],
       [0.31042144],
       [0.18766919],
       ...,
       [0.15329051],
       [0.08194107],
       [0.12040085]], dtype=float32)

In [19]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


### Making the Confusion Matrix

In [20]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1516   79]
 [ 200  205]]


0.8605

In [49]:
ann.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 6)                 78        
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 42        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 127
Trainable params: 127
Non-trainable params: 0
_________________________________________________________________


In [50]:
ann.trainable_variables

[<tf.Variable 'dense/kernel:0' shape=(12, 6) dtype=float32, numpy=
 array([[ 6.59068763e-01, -6.55071735e-01,  4.31401670e-01,
         -1.45131260e-01,  5.48640311e-01,  7.50341490e-02],
        [-2.65486717e-01,  9.65547785e-02,  4.77680266e-01,
         -3.85613702e-02,  3.29364955e-01,  1.34773165e-01],
        [-8.81564319e-01,  3.51801038e-01, -1.27997565e+00,
         -1.55644953e-01, -7.09045231e-01,  7.37781882e-01],
        [ 2.21152212e-02, -6.83940798e-02,  2.87704058e-02,
         -5.73490653e-03, -3.09614968e-02,  7.56908730e-02],
        [ 1.87857658e-01,  1.09528258e-01,  6.81089833e-02,
          5.18915243e-02,  3.23193558e-02,  1.14819884e-01],
        [ 3.72018099e-01,  3.75084221e-01, -5.23972511e-01,
         -7.39507154e-02, -8.96981061e-01, -1.18749285e+00],
        [ 1.46864742e-01,  1.04883565e-02, -3.13816704e-02,
          1.86636746e-02,  5.62307565e-03,  9.79428291e-02],
        [-2.54290074e-01, -2.00693712e-01, -1.03847019e-01,
         -3.35616380e-01, 