In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.nunique()

RowNumber          10000
CustomerId         10000
Surname             2932
CreditScore          460
Geography              3
Gender                 2
Age                   70
Tenure                11
Balance             6382
NumOfProducts          4
HasCrCard              2
IsActiveMember         2
EstimatedSalary     9999
Exited                 2
dtype: int64

In [5]:
x = df.drop(['RowNumber','CustomerId' , 'Exited'],axis =1)
y = df['Exited']

In [6]:
x.head()

Unnamed: 0,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88
1,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63
4,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [7]:
x.nunique()

Surname            2932
CreditScore         460
Geography             3
Gender                2
Age                  70
Tenure               11
Balance            6382
NumOfProducts         4
HasCrCard             2
IsActiveMember        2
EstimatedSalary    9999
dtype: int64

In [10]:
geo = pd.get_dummies(x['Geography'], drop_first = True)
sex = pd.get_dummies(x['Gender'], drop_first = True)

In [11]:
x.drop(['Surname', 'Geography', 'Gender'], axis =1 , inplace =True)

In [12]:
x= pd.concat([x,geo,sex], axis = 1)

In [13]:
x.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x = sc.fit_transform(x)

x_train, x_test , y_train, y_test = train_test_split(x,y, test_size = 0.2)

In [17]:
#importing neural network libraries

import keras 
from keras.models import Sequential                        # model creation
from keras.layers import Dense                             # Creating neural layers/ hidden layer
from keras.layers import LeakyReLU, ELU, PReLU
from keras.layers import Dropout

In [54]:
model = Sequential()                        # creating Model 

In [55]:
#adding first hidden layer of the neural network with 6 nodes and 11 input nodes.
model.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu', input_dim = 11))

In [56]:
#adding second hidden layer of the neural network with 6 nodes (no input nodes are required to be mentioned).
model.add(Dense(units = 9, kernel_initializer = 'he_uniform', activation = 'relu'))

#adding dropuot to the second hidden layer to reduce overfitting (30% of the nodes will not send the signal)
model.add(Dropout(0.3))

In [57]:
#adding third hidden layer of the neural network with 6 nodes (no input nodes are required to be mentioned).
model.add(Dense(units = 6, kernel_initializer = 'he_uniform', activation = 'relu'))

In [58]:
#adding output layer of the neural network with 1 node for binary classification.
model.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

In [59]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 6)                 72        
_________________________________________________________________
dense_16 (Dense)             (None, 9)                 63        
_________________________________________________________________
dropout (Dropout)            (None, 9)                 0         
_________________________________________________________________
dense_17 (Dense)             (None, 6)                 60        
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 7         
Total params: 202
Trainable params: 202
Non-trainable params: 0
_________________________________________________________________


In [60]:
#Compiling the model  'binary_crossentropy' for binary classification and  'catagorical_crossentropy' for multivariable classification
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [61]:
model_his = model.fit(x_train, y_train, validation_split = 0.33, batch_size = 10, epochs = 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [39]:
print(model_his.history.keys())

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


In [62]:
model.evaluate(x_test,y_test)



[0.39034923911094666, 0.8370000123977661]

In [64]:
y_pred  = model.predict(x_test)
y_pred = (y_pred >0.5)

In [66]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(accuracy_score(y_pred, y_test))
print()

print(confusion_matrix(y_pred, y_test))
print()
print(classification_report(y_pred, y_test))


0.837

[[1528  264]
 [  62  146]]

              precision    recall  f1-score   support

       False       0.96      0.85      0.90      1792
        True       0.36      0.70      0.47       208

    accuracy                           0.84      2000
   macro avg       0.66      0.78      0.69      2000
weighted avg       0.90      0.84      0.86      2000

