In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Flatten


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Load the Dataset

In [5]:
dataset=pd.read_csv("Churn_Modelling.csv")

In [6]:
dataset.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


#### Remove the columns which were not needed for modelling

In [7]:
x=dataset.drop(labels=["CustomerId","RowNumber","Exited","Surname"],axis=1)
y=datset["Exited"]

In [8]:
x.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [9]:
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

#### The Geography and Gender are categorical variable, since they are not ordinal values We can use onehotencoding to convert it to numeric feature

In [10]:
x=pd.get_dummies(x,drop_first=True,columns=["Geography","Gender"])

In [11]:
x.head(5)

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [12]:
x["Gender_Male"].value_counts()

1    5457
0    4543
Name: Gender_Male, dtype: int64

### Train Test split

In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

### Standardisation of values

In [16]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

### Fit the Model

In [37]:
model=Sequential()
model.add(Dense(x.shape[1],activation='relu',input_dim=x.shape[1]))
model.add(Dense(128,activation='relu'))
#model.add(Dense(128,activation='relu'))
model.add(Dense(1,activation='sigmoid'))

In [38]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 11)                132       
_________________________________________________________________
dense_8 (Dense)              (None, 128)               1536      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 129       
Total params: 1,797
Trainable params: 1,797
Non-trainable params: 0
_________________________________________________________________


### Compile the model

In [39]:
model.compile(optimizer="Adam",loss=tf.keras.losses.binary_crossentropy,metrics=['accuracy'])

In [40]:
#since x_train has only numpy values convert y_train to numpy
model.fit(x_train,y_train.to_numpy(),batch_size=10,epochs=10,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x290bea93070>

In [41]:
model.evaluate(x_test,y_test)



[0.33974316716194153, 0.8665000200271606]

In [42]:
from sklearn.metrics import accuracy_score,confusion_matrix
y_pred=model.predict_classes(x_test)
accuracy_score(y_test,y_pred)



0.8665

In [43]:
confusion_matrix(y_test,y_pred)

array([[1548,   45],
       [ 222,  185]], dtype=int64)