In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv('../datasets/Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [19]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

X = df.iloc[:, 3:-1].values
y = df.iloc[:, -1].values

# encoding categorical data
x_transformer = ColumnTransformer(
    [
        ('geography_encoded', OneHotEncoder(), [1]),
        ('gender_encoded', OneHotEncoder(), [2]),
    ],
    remainder='passthrough'
)
X = x_transformer.fit_transform(X)

# dropping one of the dummy variables for each encoded variable (geography and gender) 
X = np.concatenate((X[:, 1:3], X[:, 4:]), axis=1)
X

array([[0.0, 0.0, 0.0, ..., 1, 1, 101348.88],
       [0.0, 1.0, 0.0, ..., 0, 1, 112542.58],
       [0.0, 0.0, 0.0, ..., 1, 0, 113931.57],
       ...,
       [0.0, 0.0, 0.0, ..., 0, 1, 42085.58],
       [1.0, 0.0, 1.0, ..., 1, 0, 92888.52],
       [0.0, 0.0, 0.0, ..., 1, 0, 38190.78]], dtype=object)

In [20]:
# applying feature scaling
from sklearn.preprocessing import StandardScaler

x_scaler = StandardScaler()
X = x_scaler.fit_transform(X)
X

array([[-0.57873591, -0.57380915, -1.09598752, ...,  0.64609167,
         0.97024255,  0.02188649],
       [-0.57873591,  1.74273971, -1.09598752, ..., -1.54776799,
         0.97024255,  0.21653375],
       [-0.57873591, -0.57380915, -1.09598752, ...,  0.64609167,
        -1.03067011,  0.2406869 ],
       ...,
       [-0.57873591, -0.57380915, -1.09598752, ..., -1.54776799,
         0.97024255, -1.00864308],
       [ 1.72790383, -0.57380915,  0.91241915, ...,  0.64609167,
        -1.03067011, -0.12523071],
       [-0.57873591, -0.57380915, -1.09598752, ...,  0.64609167,
        -1.03067011, -1.07636976]])

In [21]:
# splitting data into training and test set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [24]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [75]:
# initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(input_dim=11, output_dim=500, init='uniform', activation='relu'))
# Adding the second hidden layer
classifier.add(Dense(output_dim=500, init='uniform', activation='relu'))
# Adding the third hidden layer
classifier.add(Dense(output_dim=500, init='uniform', activation='relu'))
# # Adding the fourth hidden layer
classifier.add(Dense(output_dim=500, init='uniform', activation='relu'))
# # Adding the fifth hidden layer
classifier.add(Dense(output_dim=500, init='uniform', activation='relu'))
# Adding the output layer
classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))

# Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  """
  import sys
  if __name__ == '__main__':
  # This is added back by InteractiveShellApp.init_path()
  del sys.path[0]
  from ipykernel import kernelapp as app


In [76]:
classifier.fit(X_train, y_train, batch_size=100, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7f2a9af62d50>

In [77]:
from sklearn.metrics import confusion_matrix
import math

y_pred = classifier.predict(X_test)

cm = confusion_matrix(y_test, [int(round(y[0])) for y in y_pred])
print(f'accuracy: {(cm[0][0] + cm[1][1]) / len(y_pred)}')
print(cm)

accuracy: 0.826
[[1425  170]
 [ 178  227]]
