# Importing the Libraries

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

# Importing the Dataset

In [50]:
dset = pd.read_csv("Churn_Modelling.csv")

X = dset.iloc[:,3:-1]
y = dset.iloc[:,-1]

In [51]:
# show X
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.00,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.80,3,1,0,113931.57
3,699,France,Female,39,1,0.00,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77
9997,709,France,Female,36,7,0.00,1,0,1,42085.58
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52


In [52]:
# show y
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

# Data Preprocessing 

#### Geography and Gender have columns need to be encoded:

In [53]:
# create dummies variables:
geography = pd.get_dummies(X['Geography'], drop_first=True) 
gender = pd.get_dummies(X['Gender'], drop_first=True)

# print(geography) # is a DataFrame
# print(gender) # is a DataFrame

In [54]:
#  Concat the geography and gender Dataframes to X Dataframe:
X = pd.concat([X,gender,geography], axis=1)
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
0,619,France,Female,42,2,0.00,1,1,1,101348.88,0,0,0
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,0,0,0
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0,0,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,1,0,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,1,0,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,0,0,0
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1,1,0


In [55]:
# drop unnecessary columns:
X = X.drop(['Geography','Gender'], axis=1)
X

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
0,619,42,2,0.00,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,42,8,159660.80,3,1,0,113931.57,0,0,0
3,699,39,1,0.00,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.10,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,1,0,0
9996,516,35,10,57369.61,1,1,1,101699.77,1,0,0
9997,709,36,7,0.00,1,0,1,42085.58,0,0,0
9998,772,42,3,75075.31,2,1,0,92888.52,1,1,0


# Splitting the dset into training and testing dset:

In [56]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [57]:
X_train

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
7389,667,34,5,0.00,2,1,0,163830.64,0,0,1
9275,427,42,1,75681.52,1,1,1,57098.00,1,1,0
2995,535,29,2,112367.34,1,1,0,185630.76,0,0,0
5316,654,40,5,105683.63,1,1,0,173617.09,1,0,1
356,850,57,8,126776.30,2,1,1,132298.49,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
9225,594,32,4,120074.97,2,1,1,162961.79,0,1,0
4859,794,22,4,114440.24,1,1,1,107753.07,0,0,1
3264,738,35,5,161274.05,2,1,0,181429.87,1,0,0
9845,590,38,9,0.00,2,1,1,148750.16,0,0,1


In [58]:
X_test

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Male,Germany,Spain
9394,597,35,8,131101.04,1,1,1,192852.67,0,1,0
898,523,40,2,102967.41,1,1,0,128702.10,0,0,0
2398,706,42,8,95386.82,1,1,1,75732.25,0,0,1
5906,788,32,4,112079.58,1,0,0,89368.59,1,0,0
2343,706,38,5,163034.82,2,1,1,135662.17,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...
1037,625,24,1,0.00,2,1,1,180969.55,0,0,0
2899,586,35,7,0.00,2,1,0,70760.69,0,0,0
9549,578,36,1,157267.95,2,1,0,141533.19,1,0,1
2740,650,34,4,142393.11,1,1,1,11276.48,1,1,0


In [59]:
y_train

7389    0
9275    0
2995    0
5316    0
356     0
       ..
9225    0
4859    0
3264    0
9845    0
2732    1
Name: Exited, Length: 8000, dtype: int64

In [60]:
y_test

9394    0
898     1
2398    0
5906    0
2343    0
       ..
1037    0
2899    0
9549    0
2740    0
6690    0
Name: Exited, Length: 2000, dtype: int64

# Feature Scaling:

In [61]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [62]:
X_train

array([[ 0.16958176, -0.46460796,  0.00666099, ..., -1.09168714,
        -0.5698444 ,  1.74309049],
       [-2.30455945,  0.30102557, -1.37744033, ...,  0.91601335,
         1.75486502, -0.57369368],
       [-1.19119591, -0.94312892, -1.031415  , ..., -1.09168714,
        -0.5698444 , -0.57369368],
       ...,
       [ 0.9015152 , -0.36890377,  0.00666099, ...,  0.91601335,
        -0.5698444 , -0.57369368],
       [-0.62420521, -0.08179119,  1.39076231, ..., -1.09168714,
        -0.5698444 ,  1.74309049],
       [-0.28401079,  0.87525072, -1.37744033, ..., -1.09168714,
         1.75486502, -0.57369368]])

In [63]:
X_test

array([[-0.55204276, -0.36890377,  1.04473698, ..., -1.09168714,
         1.75486502, -0.57369368],
       [-1.31490297,  0.10961719, -1.031415  , ..., -1.09168714,
        -0.5698444 , -0.57369368],
       [ 0.57162971,  0.30102557,  1.04473698, ..., -1.09168714,
        -0.5698444 ,  1.74309049],
       ...,
       [-0.74791227, -0.27319958, -1.37744033, ...,  0.91601335,
        -0.5698444 ,  1.74309049],
       [-0.00566991, -0.46460796, -0.33936434, ...,  0.91601335,
         1.75486502, -0.57369368],
       [-0.79945688, -0.84742473,  1.04473698, ...,  0.91601335,
         1.75486502, -0.57369368]])

# Create ANN along with Hyper-parameter tuning:

###### import libraries: 

In [64]:
from sklearn.model_selection import GridSearchCV

from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Embedding, LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid

#### define create_ann fxn; it will be used for creating diff ANN and compiling them.

In [65]:
def create_ann(layers, activation):
  ann = Sequential()

  for i,nodes in enumerate(layers):
    if i == 0: # adds Input layer and 1st Hidden Layer 
      ann.add(Dense(nodes, input_dim=X_train.shape[1]))
      ann.add(Activation(activation))
      ann.add(Dropout(0.3))
    else: # adds further Hidden Layers as specified
      ann.add(Dense(nodes))
      ann.add(Activation(activation))
      ann.add(Dropout(0.3))
    
  # add Output Layer:
  ann.add(Dense(
        units=1, # output neuron
        kernel_initializer='glorot_uniform',
        activation='sigmoid' 
        ))
    
  # Compiling ANN:
  ann.compile(
      optimizer='adam',
      loss='binary_crossentropy',
      metrics=['accuracy']
  )

  # return ann:
  return ann

##### calling create_ann() with diif combinations of arguments:

In [66]:
ann = KerasClassifier(build_fn=create_ann, verbose=0)

# params:
layers = [ (20,), (40,20), (45,30,15) ]
activation = ['sigmoid', 'relu']
batch_sizes = [128, 256]
no_epochs = [30, 50, 100]

params_dict = dict(layers=layers, 
                   activation=activation,
                   batch_size=batch_sizes,
                   epochs=no_epochs
                   )

grid = GridSearchCV(estimator=ann,
                    param_grid=params_dict,
                    cv=5
                    )


grid_result = grid.fit(X_train, y_train)

In [67]:
# GridSearchCV 's best Results:

print("Best Score: ",grid_result.best_score_)
print("Best Params: ",grid_result.best_params_)


Best Score:  0.8607499837875366
Best Params:  {'activation': 'relu', 'batch_size': 256, 'epochs': 100, 'layers': (45, 30, 15)}


# Making Predictions from Grid's Results:

In [68]:
y_pred = grid.predict(X_test)
y_pred = (y_pred > 0.5)



In [69]:
# confusion matrix and accuracy:

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_pred, y_test)
print(cm)

accu_score = accuracy_score(y_pred, y_test)
print(accu_score)

[[1523  207]
 [  72  198]]
0.8605
