In [158]:
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# Reading data

In [54]:
dataset = pd.read_csv('Churn_dataset.csv')
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


# preprocessing

#### removing the unnecessary columns

In [64]:
x = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
x

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

#### convert categorical data

In [65]:
# create dummy variable for the country column as they are not ordinal
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
x = ct.fit_transform(x)
# encode the male/female categorical column to numerical
encode_x2 = LabelEncoder()
x[:, 4] = encode_x2.fit_transform(x[:, 4])
x

array([[1.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 0.0, 1.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

In [101]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.20, random_state=0)
xtest = np.asarray(xtest).astype(np.float32)
xtrain = np.asarray(xtrain).astype(np.float32)
xtrain

array([[0.0000000e+00, 0.0000000e+00, 1.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 1.6383064e+05],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        1.0000000e+00, 5.7098000e+04],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 1.8563077e+05],
       ...,
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 1.8142988e+05],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00, ..., 1.0000000e+00,
        1.0000000e+00, 1.4875016e+05],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, ..., 1.0000000e+00,
        0.0000000e+00, 1.1885526e+05]], dtype=float32)

In [102]:
pd.DataFrame(xtrain)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,1.0,667.0,0.0,34.0,5.0,0.000000,2.0,1.0,0.0,163830.640625
1,0.0,1.0,0.0,427.0,1.0,42.0,1.0,75681.523438,1.0,1.0,1.0,57098.000000
2,1.0,0.0,0.0,535.0,0.0,29.0,2.0,112367.343750,1.0,1.0,0.0,185630.765625
3,0.0,0.0,1.0,654.0,1.0,40.0,5.0,105683.632812,1.0,1.0,0.0,173617.093750
4,0.0,0.0,1.0,850.0,0.0,57.0,8.0,126776.296875,2.0,1.0,1.0,132298.484375
...,...,...,...,...,...,...,...,...,...,...,...,...
7995,0.0,1.0,0.0,594.0,0.0,32.0,4.0,120074.968750,2.0,1.0,1.0,162961.796875
7996,0.0,0.0,1.0,794.0,0.0,22.0,4.0,114440.242188,1.0,1.0,1.0,107753.070312
7997,1.0,0.0,0.0,738.0,1.0,35.0,5.0,161274.046875,2.0,1.0,0.0,181429.875000
7998,0.0,0.0,1.0,590.0,0.0,38.0,9.0,0.000000,2.0,1.0,1.0,148750.156250


#### Building the ANN

In [149]:
import tensorflow as tf
import datetime, os
from tensorflow.python.keras.utils.vis_utils import plot_model
import pydot
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Input
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split

In [150]:
def build_model_with_functional(): 
    # instantiate the input Tensor
    input_layer = Input(shape=(len(pd.DataFrame(xtrain).columns),))
    # stack the layers
    first_dense = Dense(16, activation= tf.nn.relu)(input_layer)
    output_layer = Dense(1, activation= tf.nn.sigmoid)(first_dense)
    # declare inputs and outputs
    func_model = Model(inputs= input_layer, outputs= output_layer)
    
    return func_model

In [154]:
classifier = build_model_with_functional()
classifier.compile(optimizer= tf.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])
classifier.fit(xtrain, ytrain, epochs=8)
ypred = model.predict(xtest)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [155]:
confusion_matrix(ytest, ypred)

array([[1595,    0],
       [ 405,    0]], dtype=int64)

In [159]:
accuracy_score(ytest, ypred)

0.7975