In [0]:
import tensorflow as tf

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
ls -l

total 8
drwx------ 3 root root 4096 Apr  6 05:23 [0m[01;34mgdrive[0m/
drwxr-xr-x 1 root root 4096 Apr  4 20:20 [01;34msample_data[0m/


### 1. Read the dataset

In [0]:
import pandas as pd

In [0]:
data = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/bank.csv')

In [6]:
data.head().T

Unnamed: 0,0,1,2,3,4
RowNumber,1,2,3,4,5
CustomerId,15634602,15647311,15619304,15701354,15737888
Surname,Hargrave,Hill,Onio,Boni,Mitchell
CreditScore,619,608,502,699,850
Geography,France,Spain,France,France,Spain
Gender,Female,Female,Female,Female,Female
Age,42,41,42,39,43
Tenure,2,1,8,1,2
Balance,0,83807.9,159661,0,125511
NumOfProducts,1,1,3,2,1


### 2. Drop the columns which are unique for all users like IDs (2.5 points)

In [0]:
data_new = data.drop(['CustomerId','RowNumber','Surname'], axis =1)

In [8]:
data_new.head().T

Unnamed: 0,0,1,2,3,4
CreditScore,619,608,502,699,850
Geography,France,Spain,France,France,Spain
Gender,Female,Female,Female,Female,Female
Age,42,41,42,39,43
Tenure,2,1,8,1,2
Balance,0,83807.9,159661,0,125511
NumOfProducts,1,1,3,2,1
HasCrCard,1,0,1,0,1
IsActiveMember,1,1,0,0,1
EstimatedSalary,101349,112543,113932,93826.6,79084.1


### 3. Distinguish the feature and target set (2.5 points)

In [0]:
### Features
X = data_new.iloc[:,:-1]

In [10]:
X.head().T

Unnamed: 0,0,1,2,3,4
CreditScore,619,608,502,699,850
Geography,France,Spain,France,France,Spain
Gender,Female,Female,Female,Female,Female
Age,42,41,42,39,43
Tenure,2,1,8,1,2
Balance,0,83807.9,159661,0,125511
NumOfProducts,1,1,3,2,1
HasCrCard,1,0,1,0,1
IsActiveMember,1,1,0,0,1
EstimatedSalary,101349,112543,113932,93826.6,79084.1


In [0]:
y = data_new.iloc[:,-1]

In [12]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [13]:
X.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
dtype: object

In [0]:
### Label Encoding to ensure character variables can be normalised

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

labelencoder_X_1 = LabelEncoder()
X.iloc[:, 1] = labelencoder_X_1.fit_transform(X.iloc[:, 1])

labelencoder_X_2 = LabelEncoder()
X.iloc[:, 2] = labelencoder_X_2.fit_transform(X.iloc[:, 2])

In [15]:
X.head().T

Unnamed: 0,0,1,2,3,4
CreditScore,619.0,608.0,502.0,699.0,850.0
Geography,0.0,2.0,0.0,0.0,2.0
Gender,0.0,0.0,0.0,0.0,0.0
Age,42.0,41.0,42.0,39.0,43.0
Tenure,2.0,1.0,8.0,1.0,2.0
Balance,0.0,83807.86,159660.8,0.0,125510.82
NumOfProducts,1.0,1.0,3.0,2.0,1.0
HasCrCard,1.0,0.0,1.0,0.0,1.0
IsActiveMember,1.0,1.0,0.0,0.0,1.0
EstimatedSalary,101348.88,112542.58,113931.57,93826.63,79084.1


In [16]:
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [0]:
### After ONEHOT Encoding, lets drop the first column and consider the remaining two encoded variables
X = X[:, 1:]

### 4. Divide the data set into Train and test sets

In [0]:
from sklearn.model_selection import train_test_split

### Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

### 5. Normalize the train and test data (2.5 points)

In [0]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [0]:
X_sc_train = scaler.fit_transform(X_train)

In [0]:
X_sc_test = scaler.transform(X_test)

In [22]:
X_sc_train.shape

(7000, 11)

### 6. Initialize and build the model (10 points)

In [0]:
#Initialize Sequential model
model = tf.keras.models.Sequential()

#model.add(input_shape=(11,))

In [24]:
#Add 1st hidden layer
model.add(tf.keras.layers.Dense(22, activation = 'relu', input_shape=(11,)))

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
#Add 2nd hidden layer
model.add(tf.keras.layers.Dense(22, activation = 'relu'))

In [0]:
#Add 3rd hidden layer
model.add(tf.keras.layers.Dense(11, activation='sigmoid'))

In [0]:
#Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [28]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 22)                264       
_________________________________________________________________
dense_1 (Dense)              (None, 22)                506       
_________________________________________________________________
dense_2 (Dense)              (None, 11)                253       
Total params: 1,023
Trainable params: 1,023
Non-trainable params: 0
_________________________________________________________________


### 7. Optimize the model (5 points)

In [29]:
model.fit(X_sc_train,y_train,          
          validation_data=(X_sc_test,y_test),
          epochs=30,
          batch_size=50)

Train on 7000 samples, validate on 3000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f7553fa4ac8>

### 8. Predict the results using 0.5 as a threshold (5 points)

In [0]:
# Predicting the Test set results

y_pred = model.predict(X_sc_test)

In [0]:
y_pred = (y_pred > 0.5)
y_predict = y_pred.astype(int)

In [0]:
import numpy as np
y_predict = np.argmax(y_predict,axis=1)

### 9. Print the Accuracy score and confusion matrix (2.5 points)

In [33]:
y_predict

array([0, 0, 0, ..., 0, 0, 0])

In [0]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
results = confusion_matrix(y_test, y_predict)

In [35]:
print('Confusion Matrix:')
print(results)

Confusion Matrix:
[[2359   36]
 [ 396  209]]


In [0]:
accuracy = accuracy_score(y_test, y_predict)

In [37]:
print ('Accuracy Score :')
print(accuracy) 

Accuracy Score :
0.856
