In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('./Churn_Modelling.csv')
print(data.head())

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.63       0  
4         790

In [2]:
X = data.iloc[:, 3:13]
y = data.iloc[:, 13]

In [3]:
geo = pd.get_dummies(X['Geography'],drop_first=True)
gen = pd.get_dummies(X['Gender'], drop_first=True)

In [4]:
Geography = geo.copy()
Gender = gen.copy()

X = X.drop(['Gender','Geography'], axis = 1)
X = pd.concat([X,Gender, Geography],axis=1)

print(X.head(5))


   CreditScore  Age  Tenure    Balance  NumOfProducts  HasCrCard  \
0          619   42       2       0.00              1          1   
1          608   41       1   83807.86              1          0   
2          502   42       8  159660.80              3          1   
3          699   39       1       0.00              2          0   
4          850   43       2  125510.82              1          1   

   IsActiveMember  EstimatedSalary  Male  Germany  Spain  
0               1        101348.88     0        0      0  
1               1        112542.58     0        0      1  
2               0        113931.57     0        0      0  
3               0         93826.63     0        0      0  
4               1         79084.10     0        0      1  


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout


Using TensorFlow backend.


In [7]:
classifier = Sequential()
#Added input layer
classifier.add(Dense(units=10, kernel_initializer = 'he_uniform', activation='relu', input_dim=X_train.shape[1]))
#Adding Hidden layer
classifier.add(Dense(units=6, kernel_initializer = 'he_uniform', activation = 'relu'))
#Adding Output Layer
classifier.add(Dense(units=1, kernel_initializer='glorot_uniform', activation = 'sigmoid'))

classifier.compile(optimizer='Adamax', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model_result = classifier.fit(X_train, y_train, validation_split=0.33, batch_size=10, epochs=100)

Train on 5359 samples, validate on 2641 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100

In [None]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred>0.5)

from sklearn.metrics import accuracy_score
score = accuracy_score(y_pred, y_test)
print("Score is : "+str(score))

In [None]:
# Performing Hyper Parameter Tuning

from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid
from keras.wrappers.scikit_learn import KerasClassifier

def create_model(layers, activation):
    model = Sequential()
    for i, nodes in enumerate(layers):
        if i==0:
            model.add(Dense(nodes,input_dim=X_train.shape[1]))
            model.add(Activation(activation))
            model.add(Dropout(0.3))
        else:
            model.add(Dense(nodes))
            model.add(Activation(activation))
            model.add(Dropout(0.3))
    model.add(Dense(units=1, kernel_initializer='glorot_uniform', activation='sigmoid'))
    model.compile(optimizer='Adamax', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
from sklearn.model_selection import GridSearchCV
model = KerasClassifier(build_fn=create_model)

batches = [128]
epochs = [30]
layers = [[20], [40,20], [45,30,15]]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation = activations, batch_size=batches, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid)

In [None]:
#Commeneted out as it was performed
grid_result = grid.fit(X_train, y_train)

In [None]:
best_score = grid_result.best_score_
best_param = grid_result.best_params_
best_estimator = grid_result.best_estimator_

print("Best Score After Hyper Paramter Tuning : "+str(best_score))
print("Best Parametres are : "+str(best_param))

In [None]:
tuned_classifier = Sequential()

tuned_classifier.add(Dense(units=40, kernel_initializer='he_uniform', activation='relu', input_dim=X_train.shape[1]))
tuned_classifier.add(Dropout(0.3))

tuned_classifier.add(Dense(units=20, kernel_initializer = 'he_uniform', activation='relu'))
tuned_classifier.add(Dropout(0.3))

tuned_classifier.add(Dense(units=1, kernel_initializer='glorot_uniform', activation='sigmoid'))

tuned_classifier.compile(optimizer='Adamax', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
tuned_model_history = tuned_classifier.fit(X_train, y_train, validation_split=0.33, batch_size=128, epochs=40)

In [None]:
tuned_score = accuracy_score(y_pred, y_test)
print("Score after Tuning the model is : "+str(tuned_score))

filename = 'churn-model-85'
import pickle
pickle.dump(model, open(filename, 'wb'))
print("Model saved succesfully!")