In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

# STEP 1
Read the dataset

In [3]:
"""READ FROM LOCAL FILE (only in Anaconda, not for Colab)"""
# df = pd.read_csv("Churn.csv")

"""READ FROM LOCAL FILE (only in Colab)"""
# from google.colab import files
# df = files.upload()

"""READ FILE SAVED ON GOOGLE DRIVE (only in Colab)"""
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# The value for link below is derived by
# Go to Google Drive > Find the file to be read > Right click > Select 'Get Shareable link' > Paste below
link = "https://drive.google.com/open?id=1uiIzOwC8FEbDa7C8YHwepq373O1ac1Yl"
fluff, id = link.split('=')
print ("File id in colab: ", id) # Verify that you have everything after '='
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('Filename.csv')     # The file name provided here is not important. Any value will work. Probably used for internal storing.
df = pd.read_csv('Filename.csv')

File id in colab:  1uiIzOwC8FEbDa7C8YHwepq373O1ac1Yl


In [4]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# STEP 2
Drop the columns which are unique for all users like IDs (2.5 points)

In [5]:
print(df.shape)

# Find columns with unique values in each row
df.nunique()

(10000, 14)


RowNumber          10000
CustomerId         10000
Surname             2932
CreditScore          460
Geography              3
Gender                 2
Age                   70
Tenure                11
Balance             6382
NumOfProducts          4
HasCrCard              2
IsActiveMember         2
EstimatedSalary     9999
Exited                 2
dtype: int64

In [0]:
def apply_d_l(df_in):
    # apply dummies
    out = pd.get_dummies(df_in, columns=["Geography"])
    out = pd.get_dummies(out, columns=["Gender"])

    # applyabel encoding
    #out['Family'] = out['Family'].replace({"???": 0, '???': 1, '???': 2, '???': 3})
    
    return out

In [9]:
# Drop columns with unique values in each row
df.drop(["RowNumber", "CustomerId"], axis=1, inplace=True)

"""
Drop "Surname"
"""
df.drop("Surname", axis=1, inplace=True)

"""
Apply one hot encoding (dummification) on Geography, Gender
"""
df = apply_d_l(df)

df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,0,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,0,1,1,0


# STEP 3
Distinguish the feature and target set (2.5 points)

In [0]:
# Feature set
X = df.drop("Exited", axis=1)

# Target set
Y = df.Exited

# STEP 4
Divide the data set into train and test sets

In [0]:
from sklearn.model_selection import train_test_split

xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size=0.30, random_state=0)
# trainSet, testSet = train_test_split(X, Y, test_size=0.30, random_state=0)

# STEP 5
Normalize the train and test data (2.5 points)

In [12]:
from sklearn import preprocessing

min_max_scaler = preprocessing.MinMaxScaler()

xTrain = min_max_scaler.fit_transform(xTrain)
xTest = min_max_scaler.fit_transform(xTest)
yTrain = tf.keras.utils.to_categorical(yTrain, num_classes=2)
yTest = tf.keras.utils.to_categorical(yTest, num_classes=2)

print(xTrain.shape)
print(xTest.shape)
print(type(xTrain))
print(type(yTrain))
print(yTrain.shape)
print(yTest.shape)

(7000, 13)
(3000, 13)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(7000, 2)
(3000, 2)


# STEP 6
Initialize & build the model (10 points)

In [0]:
from tensorflow.keras.layers import LeakyReLU

#Clear out tensorflow memory
tf.keras.backend.clear_session()

#Initialize Sequential model
model = tf.keras.models.Sequential()

In [0]:
"""
Add input layer
"""
# model.add(tf.keras.layers.Dense(13, activation=activiationMethod,input_shape=(13,)))
model.add(tf.keras.layers.Reshape((13,)))


"""
Add Hidden Layers
"""
# Use relu methods instead of sigmoid
# activiationMethod = 'relu'
# activiationMethod = 'tanh'
activiationMethod = LeakyReLU(alpha=0.3)
# leakyReLU = LeakyReLU(alpha=0.3)
# activiationMethod = 'sigmoid'

# model.add(tf.keras.layers.BatchNormalization())
# Add hidden layers
model.add(tf.keras.layers.Dense(200, activation=activiationMethod))
# model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(100, activation=activiationMethod))
# model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(60, activation=activiationMethod))
# model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(30, activation=activiationMethod))
# model.add(tf.keras.layers.BatchNormalization())


"""
Add OUTPUT layer
"""
model.add(tf.keras.layers.Dense(2, activation='softmax'))

In [0]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.015)
# optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, nesterov=True)
optimizer = tf.keras.optimizers.SGD()
# optimizer = tf.keras.optimizers.
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
print(xTrain.shape)
print(xTest.shape)
# print(type(xTrain_scaled))
# print(type(yTrain))
print(yTrain.shape)
print(yTest.shape)

model.fit(xTrain,yTrain,          
          validation_data=(xTest,yTest),
          epochs=2,
          batch_size=32,
          verbose=1)

(7000, 13)
(3000, 13)
(7000, 2)
(3000, 2)
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Train on 7000 samples, validate on 3000 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f54338770b8>

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  2800      
_________________________________________________________________
dense_1 (Dense)              multiple                  20100     
_________________________________________________________________
dense_2 (Dense)              multiple                  6060      
_________________________________________________________________
dense_3 (Dense)              multiple                  1830      
_________________________________________________________________
dense_4 (Dense)              multiple                  62        
Total params: 30,852
Trainable params: 30,852
Non-trainable params: 0
____________________________________________________

# STEP 7
Optimize the model (5 points)

In [0]:
# Optimize using
    # Weight initialization (Normal or Uniform)
        # Xavier or Glorot normal distribution [underroot{2/(size of previous layer + size of this layer)}]
        # Xavier or Glorot uniform distribution [underroot{6/(size of previous layer + size of this layer)}]
        # he initialization normal distribution [underroot{2/size of previous layer}]
        # he initialization uniform distribution [underroot{6/size of previous layer}]
        # Preferred combination: he initialization with ReLU
    # Activiation function
        # Sigmoid function
        # tanh
        # Rectified Linear Unit (ReLU)
        # Leaky ReLU
    # Dropout (Stop model from memorizing/overfitting)
    # Batch normalization (can be used with or inplace of dropout)
        # Usually applied to output of hidden layers
    # Learning rate
    # Learning rate decay
    # Optimizers (momentum)
        # SGD with Momentum
        # SGD with Nesterov momentum
        # Adagrad (adapts learning rate of each weight)
        # AdaDelta
        # Adam (Adaptive moment estimation)
    # Number of iterations
    # Batch size
    # Number of hidden layers
    # Number of neurons in each layer

In [19]:
"""Function to test a neural network configuration"""
def checkThisConfiguration(xTrainx, yTrainx, xTestx, yTestx, activiationMethod='relu', optimizerType='adam', 
                    epochs=2, batchSize=32, hiddenLayers=[200, 100, 60, 30], 
                    learningRate=0.01, do_nesterov=True, decayRate=0.1, verbose=0):
    
    print("\n***Checking performance on:")
    print("ActivationMethod: ", activiationMethod, ", Optimizer: ", optimizerType, ", Learning rate: ", learningRate)
    print("Epochs: ", epochs, ", Batch size: ", batchSize)
    print("Hidden layers: ", hiddenLayers)
    print("Nestrove?: ", do_nesterov, ", Decay rate: ", decayRate)
    
    tf.keras.backend.clear_session()
    model_ = tf.keras.models.Sequential()
    
    """INPUT LAYER""", 
    model_.add(tf.keras.layers.Reshape((13,)))
    
    """HIDDEN LAYERS"""
    for layerSize in hiddenLayers:
        if activiationMethod=='LeakyReLU':
            activationMethod = LeakyReLU(alpha=0.3)
            model_.add(tf.keras.layers.Dense(layerSize, activation=LeakyReLU(alpha=0.3)))
        else:
            model_.add(tf.keras.layers.Dense(layerSize, activation=activiationMethod))
        model_.add(tf.keras.layers.BatchNormalization())
    
    """OUTPUT LAYER"""
    model_.add(tf.keras.layers.Dense(2, activation='softmax'))
    
    """OPTIMIZER"""
    optimizerDef = None
    if optimizerType == "adam":
        optimizerDef = tf.keras.optimizers.Adam(learning_rate=learningRate)
    elif optimizerType == "sgd":
        optimizerDef = tf.keras.optimizers.SGD(learning_rate=learningRate, nesterov=do_nesterov, decay=decayRate)
    
    if optimizerDef != None:
        model_.compile(optimizer=optimizerDef, loss="categorical_crossentropy", metrics=['accuracy'])
    else:
        model_.compile(optimizer=optimizerType, loss="categorical_crossentropy", metrics=['accuracy'])
    
    model_hist = model_.fit(xTrainx,yTrainx,          
          validation_data=(xTestx,yTestx),
          epochs=epochs,
          batch_size=batchSize,
          verbose=verbose)
    
    return model_hist

"""Call the function"""
# model1_hist = tuneMyNetwork02(xTrain, yTrain, xTest, yTest, activiationMethod='relu', optimizerType="adam", 
#                               epochs=3, batchSize=30)
# model1_hist.model.summary()

'Call the function'

In [0]:
"""Run desired combinations of hyperparameters"""
def findBestHyperparameters(xTrainx, yTrainx, xTestx, yTestx, 
                            activiationMethods=['sigmoid', 'relu'], 
                            optimizerTypes=['sgd', 'adagrad', 'adadelta', 'adam'], 
                            epochsToTest=[2, 5, 8, 10, 20, 100], batchSizes=[10, 32, 50], 
                            hiddenLayerSets=[[13, 20, 15, 10], [50, 150, 10], [3, 8, 9], [200, 100, 60, 30]], 
                            learningRates=[0.005, 0.01, 0.02, 0.05], 
                            do_nesterov_or_not=[True, False], 
                            decayRates=[0.5, 0.1, 0.2], 
                            verbose_in=0):
    
#     performance_df = pd.DataFrame(columns=["activationMethod", "optimizerType", "epochs","batchSizes", 
#                                            "hiddenLayerNeurons", "learningRate", "do_nesterov", "decayRate", 
#                                            "train_loss", "train_accuracy", "validation_loss", "validation_accuracy"])
    
    activationMethod_array = []
    optimizerType_array = []
    epochs_array = []
    batchSize_array = []
    hiddenLayerNeurons_array = []
    learningRate_array = []
    do_nesterov_array = []
    decayRate_array = []
    train_loss_array = []
    train_accuracy_array = []
    validation_loss_array = []
    validation_accuracy_array = []
    
    # Create possible combinations among the hyperparameters
    for activationMethod_x in activiationMethods:
        for optimizerType_x in optimizerTypes:
            for epochs_x in epochsToTest:
                for batchSize_x in batchSizes:
                    for hiddenLayers_x in hiddenLayerSets:
                        for learningRate_x in learningRates:
                            if optimizerType_x == 'sgd':
                                for do_nesterov_x in do_nesterov_or_not:
                                    for decayRate_x in decayRates:
                                        hist_x = checkThisConfiguration(xTrainx, yTrainx, xTestx, yTestx, 
                                                                        activationMethod_x, optimizerType_x, epochs_x, 
                                                                        batchSize_x, hiddenLayers_x, learningRate_x, 
                                                                        do_nesterov_x, decayRate_x, verbose=verbose_in)
                                        
                                        hist_params = hist_x.history

                                        activationMethod_array.append(activationMethod_x)
                                        optimizerType_array.append(optimizerType_x)
                                        epochs_array.append(epochs_x)
                                        batchSize_array.append(batchSize_x)
                                        hiddenLayerNeurons_array.append(hiddenLayers_x)
                                        learningRate_array.append(learningRate_x)
                                        do_nesterov_array.append(do_nesterov_x)
                                        decayRate_array.append(decayRate_x)
                                        print(hist_params)
                                        train_loss_array.append(hist_params['loss'][epochs_x-1])
                                        validation_loss_array.append(hist_params['val_loss'][epochs_x-1])
                                        # train_accuracy_array.append(hist_params['accuracy'][epochs_x-1])              # On anaconda
                                        # validation_accuracy_array.append(hist_params['val_accuracy'][epochs_x-1])     # On anaconda
                                        train_accuracy_array.append(hist_params['acc'][epochs_x-1])              # On colab
                                        validation_accuracy_array.append(hist_params['val_acc'][epochs_x-1])     # On colab

                            else:
                                do_nesterov_x = False
                                decayRate_x = 0
                                hist_x = checkThisConfiguration(xTrainx, yTrainx, xTestx, yTestx, 
                                                                activationMethod_x, optimizerType_x, epochs_x, 
                                                                batchSize_x, hiddenLayers_x, learningRate_x, 
                                                                verbose=verbose_in)
                                
                                hist_params = hist_x.history

                                activationMethod_array.append(activationMethod_x)
                                optimizerType_array.append(optimizerType_x)
                                epochs_array.append(epochs_x)
                                batchSize_array.append(batchSize_x)
                                hiddenLayerNeurons_array.append(hiddenLayers_x)
                                learningRate_array.append(learningRate_x)
                                do_nesterov_array.append(do_nesterov_x)
                                decayRate_array.append(decayRate_x)
                                print(hist_params)
                                train_loss_array.append(hist_params['loss'][epochs_x-1])
                                validation_loss_array.append(hist_params['val_loss'][epochs_x-1])
                                # train_accuracy_array.append(hist_params['accuracy'][epochs_x-1])              # On anaconda
                                # validation_accuracy_array.append(hist_params['val_accuracy'][epochs_x-1])     # On anaconda
                                train_accuracy_array.append(hist_params['acc'][epochs_x-1])              # On colab
                                validation_accuracy_array.append(hist_params['val_acc'][epochs_x-1])     # On colab
    
    #
    performance_df_x = pd.DataFrame({"activationMethod": activationMethod_array, 
                                   "optimizerType": optimizerType_array, 
                                   "epochs": epochs_array, 
                                   "batchSizes": batchSize_array,
                                   "hiddenLayerNeurons": hiddenLayerNeurons_array, 
                                   "learningRate": learningRate_array, 
                                   "do_nesterov": do_nesterov_array, 
                                   "decayRate": decayRate_array, 
                                   "train_loss": train_loss_array, 
                                   "train_accuracy": train_accuracy_array, 
                                   "validation_loss": validation_loss_array, 
                                   "validation_accuracy": validation_accuracy_array})
    
    return performance_df_x

In [23]:
"""Sigmoid combinations"""
performance_df1 = findBestHyperparameters(xTrain, yTrain, xTest, yTest, 
                        activiationMethods=['sigmoid'],
                        optimizerTypes=['sgd', 'adagrad', 'adadelta', 'adam'],
                        epochsToTest=[5],
                        batchSizes=[32],
                        hiddenLayerSets=[[200, 100, 60, 30]],
                        learningRates=[0.01, 0.02], 
                        do_nesterov_or_not=[True],
                        decayRates=[0.1], verbose_in=1)


***Checking performance on:
ActivationMethod:  sigmoid , Optimizer:  sgd , Learning rate:  0.01
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.5782074161938259, 0.5369598661490849, 0.5254261676243374, 0.5205006335122244, 0.5149096311160496], 'acc': [0.7054286, 0.7525714, 0.759, 0.7638571, 0.77342856], 'val_loss': [0.6620292293230693, 0.5695968276659648, 0.5132452943325043, 0.49944669326146446, 0.4962130310535431], 'val_acc': [0.79366666, 0.79933333, 0.8, 0.79033333, 0.78833336]}

***Checking performance on:
ActivationMethod:  sigmoid , Optimizer:  sgd , Learning rate:  0.02
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.5383748188018799, 0.48831559910093036, 0.4749

In [24]:
performance_df2 = findBestHyperparameters(xTrain, yTrain, xTest, yTest, 
                        activiationMethods=['tanh'],
                        optimizerTypes=['sgd', 'adagrad', 'adadelta', 'adam'],
                        epochsToTest=[5],
                        batchSizes=[32],
                        hiddenLayerSets=[[200, 100, 60, 30]],
                        learningRates=[0.01, 0.02], 
                        do_nesterov_or_not=[True],
                        decayRates=[0.1], verbose_in=1)




***Checking performance on:
ActivationMethod:  tanh , Optimizer:  sgd , Learning rate:  0.01
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.6138851735932487, 0.5600994415964399, 0.5484502863883972, 0.5366726416860308, 0.5342968590259553], 'acc': [0.67157143, 0.7278572, 0.73785716, 0.7474286, 0.7442857], 'val_loss': [0.5696544768015543, 0.5374750396410625, 0.5325309735139211, 0.5266701143582662, 0.5235576151212057], 'val_acc': [0.7353333, 0.7553333, 0.7453333, 0.74766666, 0.7493333]}

***Checking performance on:
ActivationMethod:  tanh , Optimizer:  sgd , Learning rate:  0.02
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.5510659923894065, 0.49608763480186463, 0.484

In [25]:
performance_df3 = findBestHyperparameters(xTrain, yTrain, xTest, yTest, 
                        activiationMethods=['LeakyReLU'],
                        optimizerTypes=['sgd', 'adagrad', 'adadelta', 'adam'],
                        epochsToTest=[5],
                        batchSizes=[32],
                        hiddenLayerSets=[[200, 100, 60, 30]],
                        learningRates=[0.01, 0.02], 
                        do_nesterov_or_not=[True],
                        decayRates=[0.1], verbose_in=1)




***Checking performance on:
ActivationMethod:  LeakyReLU , Optimizer:  sgd , Learning rate:  0.01
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.6327258095741272, 0.5700405972685133, 0.5551122195720672, 0.5469691556862423, 0.5381454468114035], 'acc': [0.6587143, 0.71, 0.7267143, 0.7387143, 0.74857146], 'val_loss': [1.3876995601654052, 0.875245470046997, 0.5598721521695454, 0.5194353130658468, 0.5127939364910126], 'val_acc': [0.207, 0.371, 0.73766667, 0.7776667, 0.781]}

***Checking performance on:
ActivationMethod:  LeakyReLU , Optimizer:  sgd , Learning rate:  0.02
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.5550804107870374, 0.49653214836120607, 0.482408525739

In [26]:
performance_df4 = findBestHyperparameters(xTrain, yTrain, xTest, yTest, 
                        activiationMethods=['relu'],
                        optimizerTypes=['sgd', 'adagrad', 'adadelta', 'adam'],
                        epochsToTest=[5],
                        batchSizes=[32],
                        hiddenLayerSets=[[200, 100, 60, 30]],
                        learningRates=[0.01, 0.02], 
                        do_nesterov_or_not=[True],
                        decayRates=[0.1], verbose_in=1)


***Checking performance on:
ActivationMethod:  relu , Optimizer:  sgd , Learning rate:  0.01
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.6657922226701464, 0.5874882426261901, 0.5662426836150033, 0.5603589200292315, 0.5570595631258828], 'acc': [0.63714284, 0.702, 0.719, 0.7221429, 0.7268571], 'val_loss': [0.6013418018023173, 0.5441504615147909, 0.5367436725298563, 0.5343230882485708, 0.5320558031400044], 'val_acc': [0.6803333, 0.739, 0.755, 0.75733334, 0.75666666]}

***Checking performance on:
ActivationMethod:  relu , Optimizer:  sgd , Learning rate:  0.02
Epochs:  5 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  True , Decay rate:  0.1
Train on 7000 samples, validate on 3000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
{'loss': [0.5854445488452912, 0.4962312593460083, 0.4771224364212581, 0.

In [28]:
performance_df_all = pd.concat([performance_df1, performance_df2, performance_df3, performance_df4], axis=0)

performance_df_all.sort_values(["validation_loss"], axis=0, inplace=True)
performance_df_all

Unnamed: 0,activationMethod,optimizerType,epochs,batchSizes,hiddenLayerNeurons,learningRate,do_nesterov,decayRate,train_loss,train_accuracy,validation_loss,validation_accuracy
7,relu,adam,5,32,"[200, 100, 60, 30]",0.02,False,0.0,0.37435,0.845857,0.346164,0.854
6,relu,adam,5,32,"[200, 100, 60, 30]",0.01,False,0.0,0.372677,0.838571,0.363171,0.857333
6,LeakyReLU,adam,5,32,"[200, 100, 60, 30]",0.01,False,0.0,0.375942,0.845143,0.378161,0.83
6,tanh,adam,5,32,"[200, 100, 60, 30]",0.01,False,0.0,0.406003,0.830714,0.406934,0.837667
6,sigmoid,adam,5,32,"[200, 100, 60, 30]",0.01,False,0.0,0.41954,0.826,0.421843,0.809667
7,sigmoid,adam,5,32,"[200, 100, 60, 30]",0.02,False,0.0,0.435669,0.814286,0.423617,0.821333
7,LeakyReLU,adam,5,32,"[200, 100, 60, 30]",0.02,False,0.0,0.378141,0.846714,0.436455,0.792667
1,LeakyReLU,sgd,5,32,"[200, 100, 60, 30]",0.02,True,0.1,0.470597,0.797714,0.446943,0.819333
1,relu,sgd,5,32,"[200, 100, 60, 30]",0.02,True,0.1,0.456887,0.799571,0.45288,0.802
1,sigmoid,sgd,5,32,"[200, 100, 60, 30]",0.02,True,0.1,0.468284,0.794857,0.453495,0.808


##### The best combination is:
- activation = relu
- optimizer = adam
- learningRate = 0.02

In [29]:
hist_preferred = checkThisConfiguration(xTrain, yTrain, xTest, yTest, 
                                        activiationMethod='relu', optimizerType='adam', epochs=50, 
                                        batchSize=32, hiddenLayers=[200, 100, 60, 30], learningRate=0.02, 
                                        do_nesterov=False, decayRate=0, verbose=1)


***Checking performance on:
ActivationMethod:  relu , Optimizer:  adam , Learning rate:  0.02
Epochs:  50 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  False , Decay rate:  0
Train on 7000 samples, validate on 3000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# STEP 8
Predict the results using 0.5 as a threshold (5 points)

In [0]:
model_preferred = hist_preferred.model

In [31]:
y_predict_prob = model_preferred.predict(xTest)
y_predict_prob

array([[0.663569  , 0.33643103],
       [0.74241114, 0.25758886],
       [0.9043453 , 0.09565477],
       ...,
       [0.86731553, 0.13268451],
       [0.8380441 , 0.16195583],
       [0.25188065, 0.7481193 ]], dtype=float32)

In [0]:
y_pred = []
for entry in y_predict_prob:
    if entry[0] > 0.5:
        y_pred.append(0)
    else:
        y_pred.append(1)

y_test = []
for entry in yTest:
    if entry[0] == 1:
        y_test.append(0)
    else:
        y_test.append(1)



# STEP 9
Print the Accuracy score and confusion matrix (2.5 points)

In [0]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [34]:
"""ACCURACY SCORE"""
print("Accuracy: ", accuracy_score(y_test, y_pred))

"""CONFUSION MATRIX"""
con_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix: \n", con_mat)

"""RECALL"""
recall = con_mat[1,1]/(con_mat[1,1] + con_mat[1,0])
print("Recall to predict existing customers: ", recall)

Accuracy:  0.8546666666666667
Confusion matrix: 
 [[2211  168]
 [ 268  353]]
Recall to predict existing customers:  0.5684380032206119


### Inference
- From the recall value it can be seen that the prevision of the model to predict the existing customers is below 50%. Hence, it is not a good model.
- Seems there is class imbalance, causing the model to predict the non-exiting records better than the exiting records.

# Apply SMOTE to remove some class imbalance. Then model again.

In [35]:
# conda install -c conda-forge imbalanced-learn

from imblearn.over_sampling import SMOTE



In [0]:
y_train = []     # list
for entry in yTrain:
    if entry[0] == 1:
        y_train.append(0)
    else:
        y_train.append(1)


In [37]:
# Convert to dataframe
y_train_df = pd.DataFrame(y_train, columns=["exiting"])
y_train_df

# Get (7000, ) from (7000, 1)
y_train = y_train_df["exiting"]
y_train.shape

(7000,)

In [38]:
print("Before OverSampling, counts of label '1': {}".format(sum(y_train==1)))
print("Before OverSampling, counts of label '0': {} \n".format(sum(y_train==0)))

sm = SMOTE(random_state=2)
X_train_res, y_train_res = sm.fit_sample(xTrain, y_train.ravel())

print('After OverSampling, the shape of train_X: {}'.format(X_train_res.shape))
print('After OverSampling, the shape of train_y: {} \n'.format(y_train_res.shape))

print("After OverSampling, counts of label '1': {}".format(sum(y_train_res==1)))
print("After OverSampling, counts of label '0': {}".format(sum(y_train_res==0)))

Before OverSampling, counts of label '1': 1416
Before OverSampling, counts of label '0': 5584 

After OverSampling, the shape of train_X: (11168, 13)
After OverSampling, the shape of train_y: (11168,) 

After OverSampling, counts of label '1': 5584
After OverSampling, counts of label '0': 5584


In [40]:
# Convert the 
y_train_res_categorized = tf.keras.utils.to_categorical(y_train_res, num_classes=2)
# yTest_categorized = tf.keras.utils.to_categorical(yTest, num_classes=2)

# print(yTest_categorized.shape)
print(yTest.shape)
print(y_train_res_categorized.shape)

(3000, 2)
(11168, 2)


In [41]:
"""Create model on the balanced data"""
hist_preferred_smotTrain = checkThisConfiguration(X_train_res, y_train_res_categorized, xTest, yTest, 
                                        activiationMethod='relu', optimizerType='adam', epochs=50, 
                                        batchSize=32, hiddenLayers=[200, 100, 60, 30], learningRate=0.02, 
                                        do_nesterov=False, decayRate=0, verbose=1)


***Checking performance on:
ActivationMethod:  relu , Optimizer:  adam , Learning rate:  0.02
Epochs:  50 , Batch size:  32
Hidden layers:  [200, 100, 60, 30]
Nestrove?:  False , Decay rate:  0
Train on 11168 samples, validate on 3000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [42]:
model_smot = hist_preferred_smotTrain.model

y_predict_prob_smot = model_smot.predict(xTest)
# y_predict_prob

# Create list of y_pred and y_test containing only one value (instead of 2 values)
y_pred_smot = []
for entry in y_predict_prob_smot:
    if entry[0] > 0.5:
        y_pred_smot.append(0)
    else:
        y_pred_smot.append(1)

y_test = []
for entry in yTest:
    if entry[0] == 1:
        y_test.append(0)
    else:
        y_test.append(1)


"""ACCURACY SCORE"""
print("Accuracy: ", accuracy_score(y_test, y_pred_smot))

"""CONFUSION MATRIX"""
con_mat = confusion_matrix(y_test, y_pred_smot)
print("Confusion matrix: \n", con_mat)

"""RECALL"""
recall = con_mat[1,1]/(con_mat[1,1] + con_mat[1,0])
print("Recall to predict existing customers: ", recall)

Accuracy:  0.7983333333333333
Confusion matrix: 
 [[1961  418]
 [ 187  434]]
Recall to predict existing customers:  0.6988727858293076


In [None]:
# y_train_res_categorized = tf.keras.utils.to_categorical(y_train_res, num_classes=2)

# """Create model on the balanced data (learningRate=0.01)"""
# hist_preferred_smotTrain = checkThisConfiguration(X_train_res, y_train_res_categorized, xTest, yTest, 
#                                         activiationMethod='relu', optimizerType='adam', epochs=50, 
#                                         batchSize=32, hiddenLayers=[200, 100, 60, 30], learningRate=0.01, 
#                                         do_nesterov=False, decayRate=0, verbose=1)

In [None]:
# model_smot = hist_preferred_smotTrain.model

# y_predict_prob_smot = model_smot.predict(xTest)
# # y_predict_prob

# # Create list of y_pred and y_test containing only one value (instead of 2 values)
# y_pred_smot = []
# for entry in y_predict_prob_smot:
#     if entry[0] > 0.5:
#         y_pred_smot.append(0)
#     else:
#         y_pred_smot.append(1)

# y_test = []
# for entry in yTest:
#     if entry[0] == 1:
#         y_test.append(0)
#     else:
#         y_test.append(1)


# """ACCURACY SCORE"""
# print("Accuracy: ", accuracy_score(y_test, y_pred_smot))

# """CONFUSION MATRIX"""
# con_mat = confusion_matrix(y_test, y_pred_smot)
# print("Confusion matrix: \n", con_mat)

# """RECALL"""
# recall = con_mat[1,1]/(con_mat[1,1] + con_mat[1,0])
# print("Recall to predict existing customers: ", recall)

# INFERENCE
After using SMOT for class balance of the data (over target value)
- The overall accuracy of the model has decreased
- The recall is substantially increased (for identifying users who may exit)

As the target of the modeling was to identify users who may exit, therefore we will take the 'RECALL' as the preferred performance metric over overall accuracy. Hence, we will use the model with the training on SMOT data.