In [None]:
# Importing the libraries we will need 
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
tf.__version__

'2.3.0'

In [None]:
# Import the dataset 
data = pd.read_csv("Churn_Modelling.csv")

# X is the matrix, we use every row (:) and then the columns AFTER the 3rd column and all but the last hence data.iloc[:,3:-1]
X = data.iloc[:,3:-1].values

# y is the dependent variable or the variable we are predicting and we import every row (:), and we only import the last column hence [:,-1]
y = data.iloc[:,-1].values

In [None]:
print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


In [None]:
# We have a few cagtegorical variables, so we need to encode them (country and gender), we also need to import a few more things
from sklearn.preprocessing import LabelEncoder

# Encoding gender
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


In [None]:
# Depending on the data we have, we might run into situations where, after label encoding, we might confuse our model into thinking that a column has data with
# some kind of order or hierarchy when we clearly don’t have it. To avoid this, we ‘OneHotEncode’ that column. What one hot encoding does is, it takes a column
# which has categorical data, which has been label encoded and then splits the column into multiple columns. The numbers are replaced by 1s and 0s, 
# depending on which column has what value. In our example, we’ll get four new columns, one for each country — Japan, U.S, India, and China.

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer([('encoder', OneHotEncoder(), [1])], remainder = 'passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [None]:
# Split the data
from sklearn.model_selection import train_test_split
X_train , X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Scale the train and test set 
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [None]:
# Sequential class allows us to build out an ANN in a sequential matter adding layers one after the other from the input all the way to the output
# Remember keras was kinda imported into tf so we call it from the tf library
ann = tf.keras.models.Sequential()

In [None]:
# We will use the dense class in order to add our first input and hidden layers
ann.add(tf.keras.layers.Dense(units = 6, activation= 'relu'))

# We use the same line of code to add another hidden layer
ann.add(tf.keras.layers.Dense(units = 6, activation= 'relu'))

In [None]:
# Now we need to add the output layer to the model we do almost the same thing, but we change the number of units and the activation function
# The number of units is one because we are predicting a binary variable
# We don't want to have the rectifier fxn (relu) to be predicting the dependent var so we change to sigmoid bc it gives us the probability that a customer decides
# to leave the bank (gives us the probability of a certain outcome) sigmoid is probably the most common fxn used for output layers that are binary
ann.add(tf.keras.layers.Dense(units = 1, activation= 'sigmoid'))

In [None]:
# Now we have to compile the ANN, the parameter for the compile method are the optimizer, the loss fxn, and the metric
# Optimizers that can perform Stochastic Gradient Descent (SGD) is usually the best performing so we use adam
# Loss fxn for binary outcomes will always be binary_crossentropy otherwise if the outcome is not binary we use categorical_crossentropy
# Also, if we have more than 2 categories, the activation fxn for the output layer should be softmax
ann.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

In [None]:
# Now we need to train the model, the batch size is more efficient and has better performance when training an ANN (Batch Learning), it's not comparing results one 
# by one, but it's doing it for a whole batch instead and the classic batch size is 32 so we use 32, but it is a hyperparameter so we can tune it for better 
# performance if needed. The last parameter is epochs (I like to call it a runthrough), which is the number of times we essentially train the model
# (don't use small # for epochs bc it wont be as accurate)

ann.fit(X_train, y_train, batch_size=32, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f9c7b914be0>

In [None]:
# Now we need to call the predict method to predict a certain customer's churn whether or not they leave the bank (the predict method takes double sq brackets [[]])

ann.predict(sc.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]]))
print(ann.predict(sc.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]])))
print(ann.predict(sc.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]])) > 0.5)


[[0.0208101]]
[[False]]


In [None]:
# Now we need to predict the test data to see how the model performs. 
y_pred = ann.predict(X_test)

# Makes it 1 if greater than 0.5
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), 1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [None]:
# Create a confusion matrix to visualize the num of TP, TN, FP, FN
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test,y_pred)

[[1526   69]
 [ 204  201]]


0.8635