In [1]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (
    LabelEncoder, OneHotEncoder, StandardScaler
)

Using TensorFlow backend.


In [14]:
dataset = pd.read_csv('datasets/churn.csv')
X = dataset.iloc[:, range(3, 13)].values
y = dataset.iloc[:, -1].values

In [16]:
# because we have categorical values we need to encode them
country_label_encoder = LabelEncoder()
X[:, 1] = country_label_encoder.fit_transform(X[:, 1])

gender_label_encoder = LabelEncoder()
X[:, 2] = gender_label_encoder.fit_transform(X[:, 2])

# now we one hot encode them for categorical data
# that has more than 2 values. (only the country in column 1)
one_hot_encoder = OneHotEncoder(categorical_features=[1])
X = one_hot_encoder.fit_transform(X).toarray()

# to avoid the dummy variable trap we remove one variable
X = X[:, 1:]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=1/5, random_state=0
)

In [5]:
# now we have to make sure all of our values are in the same scale.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Create the simple classifier neural network with 3 layers;
# - Input Layer
#    * We input our feature set into the NN, so the first layer should accept 11 features.
# - First Layer
#    * The activation function is set to `relu` which is the most popular neuron activation function.
#    * The amount of neurons in this layer is not an exact science, more an art. A good starting point is
#      to take the dimensions of the input layer + the output layer divided by 2. (11 + 1) / 2 = 6 neurons
#    * The initialisation function is set to `uniform` to initialise the first amount of weights to a random
#      number close to zero.
# - Second layer
#    * Takes the output from layer one and applies the same `init` and `activiation` arguments. This is 
#      to try to get the model to generalise and not be too dumb, we have to be careful of overfitting though.
# - Output layer
#    * Since our preferred output is only one binary dimension we need an output layer that uses an activation
#      function that gives us a value between 0..1, the sigmoid function gives us this value. Which is actually
#      a probability of it being 0 or 1.
classifier = Sequential([
    Dense(activation="relu", input_dim=11, units=6, kernel_initializer="uniform"),
    Dense(activation="relu", units=6, kernel_initializer="uniform"),
    Dense(activation="sigmoid", units=1, kernel_initializer="uniform")
])

# Now we finish setting up the compiler by adding our optimizer and loss function to use for updating the weights.
# we also determine which metrics we want to track as being optimised.
# - Adam optimizer is a Stochastic optimizer which means it adjusts the weights on every row/round or 
#   batch of the dataset.
# - The binary_crossentropy loss function determines the amount of distance between the predicted and the actual value
#   and adjusts the weight given that `cost`.
# - the metric we are trying to improve is the accuracy of the model
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
# Now we train the neural network by fitting our training data to it
# We give a batch size of 10 rows before updating the weights and decide to do this for 100 epochs, meaning we go
# throught the dataset 100 times before the model is trained.
classifier.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1184d98d0>

In [8]:
# now we check the confusion matric to see how well the NN is trained on our test set.
y_prediction = classifier.predict(X_test)
y_prediction = y_prediction > 0.5
confusion_matrix(y_test, y_prediction)

array([[1545,   50],
       [ 265,  140]])