In [0]:
We welcome you all to the case-based project of this course. This project has 2 case studies.
The first case study (described below - 30 points) covers concepts taught in Part 1 (the first 8 hours of Neural networks basics).
1st case study - Project 1:
The case study is from an open-source dataset from Kaggle.
Link to the Kaggle project site:
https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling
Given a Bank customer, can we build a classifier that can determine whether they will leave or not using Neural networks?
Case file:
bank.csv
The points distribution for this case is as follows:
    1. Read the dataset
    2. Drop the columns which are unique for all users like IDs (2.5 points)
    3. Distinguish the feature and target set (2.5 points)
    4. Divide the data set into train and test sets
    5. Normalize the train and test data (2.5 points)
    6. Initialize & build the model (10 points)
    7. Optimize the model (5 points)
    9. Predict the results using 0.5 as a threshold (5 points)
    10. Print the Accuracy score and confusion matrix (2.5 points)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
from datetime import datetime
import time

In [0]:
df_raw = pd.read_csv('/content/drive/My Drive/Churn.csv')

In [4]:
df_raw.shape

(10000, 14)

In [5]:
df_raw.head(3)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


In [0]:
df = df_raw.copy(deep=True)

In [0]:
df.drop(columns=(['RowNumber','CustomerId','Surname']),inplace= True)

In [8]:
df.head(2)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0


In [9]:
#deal with categorical data --> encode them

from sklearn.preprocessing import LabelEncoder
labelencoder_x = LabelEncoder()
df.iloc[:, 1] = labelencoder_x.fit_transform(df.iloc[:, 1]) #applying on Geography
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,Female,42,2,0.0,1,1,1,101348.88,1
1,608,2,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,0,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,0,Female,39,1,0.0,2,0,0,93826.63,0
4,850,2,Female,43,2,125510.82,1,1,1,79084.1,0


In [10]:
#apply encoder on Gender as well
labelencoder_x_2 = LabelEncoder()
df.iloc[:, 2] = labelencoder_x_2.fit_transform(df.iloc[:, 2]) #applying on Gender
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [11]:
#One hot encoding. 

from keras.utils import to_categorical
encoded = pd.DataFrame(to_categorical(df.iloc[:, 1]))
#no need to encode Gender, as there are only two categories

df = pd.concat([encoded, df], axis = 1)
df.head()

Using TensorFlow backend.


Unnamed: 0,0,1,2,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1.0,0.0,0.0,619,0,0,42,2,0.0,1,1,1,101348.88,1
1,0.0,0.0,1.0,608,2,0,41,1,83807.86,1,0,1,112542.58,0
2,1.0,0.0,0.0,502,0,0,42,8,159660.8,3,1,0,113931.57,1
3,1.0,0.0,0.0,699,0,0,39,1,0.0,2,0,0,93826.63,0
4,0.0,0.0,1.0,850,2,0,43,2,125510.82,1,1,1,79084.1,0


In [12]:
#Dropping the existing "geography" category, and one of the onehotcoded columns.

df = df.drop(['Geography', 0], axis = 1)
df.head()

Unnamed: 0,1,2,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0.0,0.0,619,0,42,2,0.0,1,1,1,101348.88,1
1,0.0,1.0,608,0,41,1,83807.86,1,0,1,112542.58,0
2,0.0,0.0,502,0,42,8,159660.8,3,1,0,113931.57,1
3,0.0,0.0,699,0,39,1,0.0,2,0,0,93826.63,0
4,0.0,1.0,850,0,43,2,125510.82,1,1,1,79084.1,0


In [0]:
x= np.array(df.drop(columns=(['Exited'])))
y = np.array(df['Exited'])

In [0]:
#train and test set split, and feature scaling

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [0]:
import keras
from keras.models import Sequential
from keras.layers import Dense #to add layers

In [16]:
#there is no rule on how many nodes each hidden layer should have
classifier = Sequential()
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 11)) ## relu = Rectified Linear Unit , Scaled Exponential Linear Unit (SELU).
#init --> initialize weights according to uniform distribution
#input_dim is required for the first hidden layer, as it is the first starting point. --> number of nodes.
#output_dim --> number of nodes of the hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))
#input_dim --> remove it as it already knows what to expect.

#the output layer
classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
#output_dim should be 1, as output is binary outcome, and activation should be 'sigmoid'
#If dependent variables have more than two categories, use activation = 'softmax'

#compile the model --> backpropagation -> gradient descent
classifier.compile(optimizer = 'SGD', loss = "binary_crossentropy", metrics = ['accuracy'])
#optimizer = algorithm to find the optimal set of weights in ANN
#loss = functions that should be optimized. if more than two categories, use "categorical_crossentropy"
#metrics = criterion used to calculate the performance of the model.






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  
  
  # Remove the CWD from sys.path while we load stuff.


In [17]:
classifier.fit(X_train, Y_train, batch_size = 10, nb_epoch = 20)
#batch_size = the number of observations after which you want to update the weights
#           batch size and epochs should be tuned through experiments.
#epoch = going through the whole dataset

  """Entry point for launching an IPython kernel.





Epoch 1/20





Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f7decf5c2b0>

In [18]:
#predicting the results

y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5) #to classify each probability into True or False

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, y_pred)
print (cm, '\n\n', y_pred[:5, :])

[[1595    0]
 [ 405    0]] 

 [[False]
 [False]
 [False]
 [False]
 [False]]


In [19]:
#accuracy
print ((cm[0][0] + cm[1][1])/(cm[0][0] +cm[0][1] + cm[1][0]+cm[1][1]))

0.7975
