# Step 1: installation and setup

In [132]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [133]:
print(tf.__version__)

2.11.0


# Step 2: Data Preproccesing

In [134]:
dataset = pd.read_csv('Churn_Modelling.csv')

In [135]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [136]:
# Selecting independent and dependent variables
x = dataset.iloc[:,3:-1]
y = dataset['Exited']
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [137]:
dataset.isnull().values.any()

False

In [138]:
dataset.select_dtypes(include='object').columns

Index(['Surname', 'Geography', 'Gender'], dtype='object')

In [139]:
from sklearn.preprocessing import LabelEncoder

In [140]:
label_1 = LabelEncoder()
x['Geography'] = label_1.fit_transform(x['Geography'])

In [141]:
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,Female,42,2,0.0,1,1,1,101348.88
1,608,2,Female,41,1,83807.86,1,0,1,112542.58
2,502,0,Female,42,8,159660.8,3,1,0,113931.57
3,699,0,Female,39,1,0.0,2,0,0,93826.63
4,850,2,Female,43,2,125510.82,1,1,1,79084.1


In [142]:
label_2 = LabelEncoder()
x['Gender'] = label_1.fit_transform(x['Gender'])

In [143]:
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1


In [144]:
# Avoiding the Dummy variable trap
x = pd.get_dummies(x, drop_first=True, columns=['Geography'])

In [145]:
x.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,1


In [146]:
from sklearn.model_selection import train_test_split

In [147]:
x_train, x_test, y_train, y_test = train_test_split(
     x, y, test_size=0.2, random_state=0)

In [148]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Building the Model

In [149]:
# Creating an object (Initializing the ANN)
model = tf.keras.models.Sequential()

In [150]:
# Adding input layer and first hidden layer
# 1) units = 6
# 2) activation function = ReLU
# 3) input dimention = 11
model.add(tf.keras.layers.Dense(units=6, activation='relu', input_dim=11))

In [151]:
# Adding second hidden layer
model.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [152]:
# Output layer
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [153]:
# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [154]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 6)                 72        
                                                                 
 dense_13 (Dense)            (None, 6)                 42        
                                                                 
 dense_14 (Dense)            (None, 1)                 7         
                                                                 
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


# Step 4: Training the model

In [155]:
model.fit(x_train, y_train.to_numpy(), batch_size=10, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc8d5535850>

# Step 5: Model evaluation and prediction

In [156]:
test_loss, test_acc = model.evaluate(x_test, y_test.to_numpy())



In [157]:
y_pred = (model.predict(x_test) > 0.5).astype("int32")



In [158]:
print(y_pred)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [159]:
print(type(y_test.tolist()))

<class 'list'>


In [160]:
y_test = y_test.to_numpy()

In [161]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1550   45]
 [ 278  127]]


In [162]:
truevalue = 0
falsevalue = 0
y_gav = y_test.tolist()
for i in range(len(y_pred)):
    if y_pred[i]==y_gav[i]:
        truevalue += 1
    else:
        falsevalue += 1
        print(i)

print(truevalue)
print(falsevalue)
print(truevalue/2000.0)

1
8
15
31
41
42
58
59
63
70
74
76
81
88
93
95
107
112
120
129
137
142
145
147
156
168
173
174
183
185
190
197
203
219
229
230
243
247
267
274
275
287
289
299
303
312
315
317
318
323
341
345
347
353
355
356
363
364
387
389
401
416
419
421
447
479
481
488
491
501
514
518
524
531
546
549
552
555
556
558
560
573
577
579
581
585
586
591
602
609
615
619
624
633
643
644
657
664
665
670
675
683
685
686
688
692
698
700
702
706
708
709
722
724
732
733
740
741
744
758
762
763
767
772
773
779
783
787
793
804
809
815
833
849
860
868
879
888
890
891
892
893
897
900
918
921
923
930
933
940
942
944
949
950
956
969
970
976
978
986
993
994
996
1004
1006
1009
1013
1016
1032
1060
1072
1077
1085
1095
1097
1099
1100
1105
1117
1122
1126
1140
1147
1148
1150
1151
1158
1159
1166
1168
1173
1174
1183
1187
1198
1210
1215
1218
1220
1222
1235
1237
1241
1260
1261
1268
1284
1285
1302
1316
1318
1319
1321
1322
1323
1329
1332
1334
1335
1337
1347
1348
1354
1360
1366
1367
1368
1370
1378
1381
1386
1418
1421
1425
1436
1438
1

In [164]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc_cm = accuracy_score(y_test, y_pred)
print(acc_cm)

[[1550   45]
 [ 278  127]]
0.8385
