In [155]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
#import matplotlib.pyplot as plt
import tensorflow as tf

### Load the data

In [156]:
dataset = pd.read_csv('../Machine Learning A-Z (Codes and Datasets)/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/Python/Churn_Modelling.csv')

### Define variables

In [157]:
x = dataset.iloc[:,3:-1].values
y = dataset.iloc[:,-1].values

In [158]:
print(x)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [159]:
print(y)

[1 0 1 ... 1 1 0]


### Change the categorical variables into dummy variables(encode)

In [160]:
df = pd.DataFrame(data=x[:,2], columns=['Gender'])
df

Unnamed: 0,Gender
0,Female
1,Female
2,Female
3,Female
4,Female
...,...
9995,Male
9996,Male
9997,Female
9998,Male


In [161]:
df['Gender'] = df['Gender'].map({'Female':1, 'Male':0})
df

Unnamed: 0,Gender
0,1
1,1
2,1
3,1
4,1
...,...
9995,0
9996,0
9997,1
9998,0


In [162]:
x[:,2] = df['Gender']
x

array([[619, 'France', 1, ..., 1, 1, 101348.88],
       [608, 'Spain', 1, ..., 0, 1, 112542.58],
       [502, 'France', 1, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 1, ..., 0, 1, 42085.58],
       [772, 'Germany', 0, ..., 1, 0, 92888.52],
       [792, 'France', 1, ..., 1, 0, 38190.78]], dtype=object)

In [163]:
#You can use Labelencoder or .map

#from sklearn.preprocessing import LabelEncoder
#le = LabelEncoder()
#x[:,2] = le.fit_transform(x[:,2])
#print(x)

In [164]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
x = np.array(ct.fit_transform(x))
print(x)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Split the data set into Training and Test

In [165]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=0)

In [166]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

### Create the layers

In [195]:
output_size = 1
hidden_layer_size = 50
input_size = 6

#hyperparameters are based on experience

#after TF2, keras integreted in TF

#you can use which one you want to use

#'relu' -> rectifier activation function
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
])

#or

ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=100, activation='relu'))
ann.add(tf.keras.layers.Dense(units=100, activation='relu'))

#if we would have one hot or binary, we would choose differently of the units of output layer 
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
#when doing non-binary classification, when predicting more than two categories, activation of output -> softmax

### Compiling

In [196]:
#for binary classification, loss -> binary_crossentropy
#for non-binary classification, loss -> categorical_crossentropy
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics  = ['accuracy'])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics  = ['accuracy'])

### Train the ann on the training set

In [197]:
batch_size = 10
epochs = 40

early_stopping = tf.keras.callbacks.EarlyStopping()

ann.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, verbose=2)
#model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, verbose=2)

Epoch 1/40
800/800 - 1s - loss: 0.4047 - accuracy: 0.8300 - 965ms/epoch - 1ms/step
Epoch 2/40
800/800 - 1s - loss: 0.3538 - accuracy: 0.8547 - 645ms/epoch - 807us/step
Epoch 3/40
800/800 - 1s - loss: 0.3422 - accuracy: 0.8569 - 705ms/epoch - 881us/step
Epoch 4/40
800/800 - 1s - loss: 0.3381 - accuracy: 0.8602 - 703ms/epoch - 879us/step
Epoch 5/40
800/800 - 1s - loss: 0.3322 - accuracy: 0.8605 - 687ms/epoch - 859us/step
Epoch 6/40
800/800 - 1s - loss: 0.3279 - accuracy: 0.8656 - 666ms/epoch - 833us/step
Epoch 7/40
800/800 - 1s - loss: 0.3266 - accuracy: 0.8664 - 753ms/epoch - 942us/step
Epoch 8/40
800/800 - 1s - loss: 0.3238 - accuracy: 0.8680 - 648ms/epoch - 810us/step
Epoch 9/40
800/800 - 1s - loss: 0.3204 - accuracy: 0.8665 - 642ms/epoch - 802us/step
Epoch 10/40
800/800 - 1s - loss: 0.3180 - accuracy: 0.8691 - 654ms/epoch - 818us/step
Epoch 11/40
800/800 - 1s - loss: 0.3132 - accuracy: 0.8686 - 653ms/epoch - 816us/step
Epoch 12/40
800/800 - 1s - loss: 0.3111 - accuracy: 0.8705 - 648m

<keras.callbacks.History at 0x1bd0b1b82e0>

### Predicting the result of a single observation

In [198]:
#predict method waits 2D array therefore [[]]
#we need to scale the observation too
print(ann.predict(sc.transform([[1,0,0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
print(ann.predict(sc.transform([[1,0,0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)
#False -> not leaving the bank

[[0.01139096]]
[[False]]


### Predicting the Test set results

In [199]:
y_pred = ann.predict(x_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), 1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


### Making the Confusion Matrix

In [200]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1445  150]
 [ 171  234]]


0.8395