In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [2]:
dataset = pd.read_csv('Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
X = dataset.iloc[:, 3:13].values
# we take all rows and all columns from index 3 to 12(features)
y = dataset.iloc[:, 13].values
# results
print(X.shape)
print(y.shape)

(10000, 10)
(10000,)


In [4]:
labelencoder_X_gender = LabelEncoder()
X[:, 2] = labelencoder_X_gender.fit_transform(X[:, 2])  
# changing gender to numbers
# we use label encoder because there are only 2 types of genders.
print(X)
print(X.shape)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]
(10000, 10)


In [5]:
# now convert country to 3 dummy variables in one hot format because we have 3 different types of countries
columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(columnTransformer.fit_transform(X), dtype=np.str)
X = X[:, 1:]  
# remove one dummy variable to avoid the dummy variable trap
print(X)
# credit score is shifted after geography
print(X.shape)

[['0.0' '0.0' '619' ... '1' '1' '101348.88']
 ['0.0' '1.0' '608' ... '0' '1' '112542.58']
 ['0.0' '0.0' '502' ... '1' '0' '113931.57']
 ...
 ['0.0' '0.0' '709' ... '0' '1' '42085.58']
 ['1.0' '0.0' '772' ... '1' '0' '92888.52']
 ['0.0' '0.0' '792' ... '1' '0' '38190.78']]
(10000, 11)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

# Model

In [7]:
def build_classifier():  # for kerasclassifier as it needs a function
    classifier = Sequential()
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))

    classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return classifier

In [8]:
classifier = build_classifier()

classifier.fit(X_train, y_train, batch_size=10, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1db51558e88>

In [9]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)  
# convert probability to 0(False) or 1(True)

y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

## Confusion Matrix

- (0, 0) - True Negative
- (0, 1) - Falso Positive
- (1, 0) - False Negative
- (1, 1) - True Negative

In [10]:
# We will use a confusion matrix to predict the accuracy
cm = confusion_matrix(y_test, y_pred)
# (i=j) diagonal of confusion matrix represents correct predictions and the other diagonal shows incorrect predictions
print(cm)
accuracy = ((cm[0][0]+cm[1][1])/2000)
# 2000 is the number of rows in test dataset and sum of diagonal elements gives total correct predictions
print("accuracy: ", accuracy)

[[1595    0]
 [ 405    0]]
accuracy:  0.7975


In [11]:
# making new predictions
new_prediction = classifier.predict(sc.transform(np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
# we need a horizontal vector here so we use [[]] which creates a 2d array
# to avoid warning we make first element float, also do not forget to scale the data
new_prediction = (new_prediction > 0.5)
new_prediction

array([[False]])

# Evaluating the model

Using K-fold cross validation - In this method dataset is divided into k parts and then k-1 parts are used for training and 1 part is used for testing. Also our model is trained k times with different parts of dataset as test set each time.

In [12]:
classifier = KerasClassifier(build_fn=build_classifier, batch_size=10, epochs=10)
accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=10, n_jobs=1)
# we basically break X_train and y_train into 10 different parts, and then train and test on those 10 different models
# n_jobs is number of CPUs to use(-1 for all), X is data to fit, y are actual answers to compare with predictions
# cross_val_score will return k(10 here) accuracies as obtained from k fold cross validation

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [13]:
print(accuracies.mean())  # mean
print(accuracies.std())  # variance

0.8196250021457672
0.01822643186291515


# Tuning the model

Using GridSearchCV - trying multiple models with different hyperparameters and also performs k-fold cross validation

In [14]:
def build_best_classifier(optimizer):  # important argument if optimizer is to be changed
    classifier = Sequential()
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
    classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return classifier

In [15]:
classifier = KerasClassifier(build_fn=build_best_classifier)  
# NOTE that we do not need to provide arguments here

# hyperparameters
parameters = {'batch_size': [16, 32],  # checks two values 16, 32
              'epochs': [10, 20],  # checks two values 10 and 20
              'optimizer': ['adam', 'rmsprop']}  # tries with 2 optimizers

In [16]:
grid_search = GridSearchCV(estimator=classifier, param_grid=parameters, scoring='accuracy', cv=10)
# estimator is our model, param_grid contains parameters dictionary we try and cv aur number of k folds

grid_search = grid_search.fit(X_train, y_train)

Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12

In [17]:
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
print("best_parameters: ", best_parameters)
print("best_accuracy: ", best_accuracy)

best_parameters:  {'batch_size': 16, 'epochs': 20, 'optimizer': 'rmsprop'}
best_accuracy:  0.8318749999999999
