# Step 1: Installation and Setup

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

2.2.0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Step 2: Data Preprocessing

In [None]:
# reading the file
dataset = pd.read_csv('Churn_Modelling.csv')

In [None]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
# Selecting the Independent and Dependent variables
x = dataset.drop(labels=['RowNumber','CustomerId','Surname','Exited'], axis=1)
y = dataset['Exited']

In [None]:
x.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1
5,645,Spain,Male,44,8,113755.78,2,1,0,149756.71
6,822,France,Male,50,7,0.0,2,1,1,10062.8
7,376,Germany,Female,29,4,115046.74,4,1,0,119346.88
8,501,France,Male,44,4,142051.07,2,0,1,74940.5
9,684,France,Male,27,2,134603.88,1,1,1,71725.73


In [None]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [None]:
# Encoding the categorical data
from sklearn.preprocessing import LabelEncoder

In [None]:
label_1 = LabelEncoder()
x['Geography'] = label_1.fit_transform(x['Geography'])

In [None]:
x.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,Female,42,2,0.0,1,1,1,101348.88
1,608,2,Female,41,1,83807.86,1,0,1,112542.58
2,502,0,Female,42,8,159660.8,3,1,0,113931.57
3,699,0,Female,39,1,0.0,2,0,0,93826.63
4,850,2,Female,43,2,125510.82,1,1,1,79084.1


In [None]:
label_2 = LabelEncoder()
x['Gender'] = label_2.fit_transform(x['Gender'])

In [None]:
x.head(8)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1
5,645,2,1,44,8,113755.78,2,1,0,149756.71
6,822,0,1,50,7,0.0,2,1,1,10062.8
7,376,1,0,29,4,115046.74,4,1,0,119346.88


In [None]:
# Avoiding the Dummy variable trap
x = pd.get_dummies(x, drop_first=True, columns=['Geography'])

In [None]:
x.head(8)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,1
5,645,1,44,8,113755.78,2,1,0,149756.71,0,1
6,822,1,50,7,0.0,2,1,1,10062.8,0,0
7,376,0,29,4,115046.74,4,1,0,119346.88,1,0


In [None]:
# Splitting the dataset into train and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
x_train

array([[ 0.16958176, -1.09168714, -0.46460796, ...,  1.10643166,
        -0.5698444 ,  1.74309049],
       [-2.30455945,  0.91601335,  0.30102557, ..., -0.74866447,
         1.75486502, -0.57369368],
       [-1.19119591, -1.09168714, -0.94312892, ...,  1.48533467,
        -0.5698444 , -0.57369368],
       ...,
       [ 0.9015152 ,  0.91601335, -0.36890377, ...,  1.41231994,
        -0.5698444 , -0.57369368],
       [-0.62420521, -1.09168714, -0.08179119, ...,  0.84432121,
        -0.5698444 ,  1.74309049],
       [-0.28401079, -1.09168714,  0.87525072, ...,  0.32472465,
         1.75486502, -0.57369368]])

In [None]:
x_test

array([[-0.55204276, -1.09168714, -0.36890377, ...,  1.61085707,
         1.75486502, -0.57369368],
       [-1.31490297, -1.09168714,  0.10961719, ...,  0.49587037,
        -0.5698444 , -0.57369368],
       [ 0.57162971, -1.09168714,  0.30102557, ..., -0.42478674,
        -0.5698444 ,  1.74309049],
       ...,
       [-0.74791227,  0.91601335, -0.27319958, ...,  0.71888467,
        -0.5698444 ,  1.74309049],
       [-0.00566991,  0.91601335, -0.46460796, ..., -1.54507805,
         1.75486502, -0.57369368],
       [-0.79945688,  0.91601335, -0.84742473, ...,  1.61255917,
         1.75486502, -0.57369368]])

# Step 3: Building the Model

In [None]:
# Creating an object (Initilizing the ANN)
model = tf.keras.models.Sequential()

In [None]:
# Adding input layer and first hidden layer
# 1) units = 6
# 2) activation function = ReLU
# 3) input dimention = 11
model.add(tf.keras.layers.Dense(units=6, activation='relu', input_dim = 11))

In [None]:
# Adding second hidden layer
model.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
# Output layer
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 6)                 72        
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 42        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


# Step 4: Training the model

In [None]:
model.fit(x_train, y_train.to_numpy(), batch_size=10, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f2c9da71b38>

# Step 5: Model evaluation and prediction

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test.to_numpy())



In [None]:
print('Test Accuracy: {}'.format(test_acc))

Test Accuracy: 0.8460000157356262


In [None]:
y_pred = model.predict_classes(x_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [None]:
print(y_pred)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [None]:
print(y_test)

9394    0
898     1
2398    0
5906    0
2343    0
       ..
1037    0
2899    0
9549    0
2740    0
6690    0
Name: Exited, Length: 2000, dtype: int64


In [None]:
y_test = y_test.to_numpy()

In [None]:
print(y_pred[11]), print(y_test[11])

[0]
0


(None, None)

In [None]:
# confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1523   72]
 [ 236  169]]


In [None]:
acc_cm = accuracy_score(y_test, y_pred)
print(acc_cm)

0.846
