In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

% matplotlib inline

In [54]:
churn_data = pd.read_csv('Churn_Modelling.csv')

In [55]:
churn_data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [56]:
churn_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
RowNumber          10000 non-null int64
CustomerId         10000 non-null int64
Surname            10000 non-null object
CreditScore        10000 non-null int64
Geography          10000 non-null object
Gender             10000 non-null object
Age                10000 non-null int64
Tenure             10000 non-null int64
Balance            10000 non-null float64
NumOfProducts      10000 non-null int64
HasCrCard          10000 non-null int64
IsActiveMember     10000 non-null int64
EstimatedSalary    10000 non-null float64
Exited             10000 non-null int64
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


### Data preprocessing

Obtain the independent and dependent variable

In [57]:
X = churn_data.iloc[:, 3:-1]
y = churn_data.iloc[:, -1]

In [58]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [59]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

Encode the categorical data

In [60]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

Label_X_1 = LabelEncoder()
X.iloc[:,1] =  Label_X_1.fit_transform(X.iloc[:,1])
Label_X_2 = LabelEncoder()
X.iloc[:,2] =  Label_X_2.fit_transform(X.iloc[:,2])

In [61]:
# Using one hot encoding from Pandas
X1 = X
one_hot_geo = pd.get_dummies(X1['Geography'], prefix='Geography')
X1 = X1.join(one_hot_geo)
X1.drop(labels='Geography', axis=1, inplace=True)

In [62]:
X1.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_0,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,1


In [63]:
# Using one hot encoding from sklearn
one_hot_geo_2 = OneHotEncoder(categorical_features=[1])
X_2 = X
X_2 = one_hot_geo_2.fit_transform(X).toarray()

In [64]:
# Remove one column of the dummy variables
X1.drop(labels='Geography_2', axis=1, inplace=True)

In [65]:
X1.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_0,Geography_1
0,619,0,42,2,0.0,1,1,1,101348.88,1,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0
3,699,0,39,1,0.0,2,0,0,93826.63,1,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0


### Prepare training and testing set for the model training

In [66]:
y.value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

Given that the dependent variable is not uiform, we will use the stratified split

In [67]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X1, y, test_size=0.1, stratify=y, random_state=0)

In [68]:
X_test.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_0,Geography_1
2883,559,0,38,8,95139.41,1,1,1,86575.46,1,0
815,745,1,25,5,157993.15,2,1,0,146041.45,1,0
5395,589,1,48,5,126111.61,1,0,1,133961.19,0,1
6400,676,0,30,5,0.0,2,0,0,179066.58,0,0
8529,603,1,45,9,0.0,1,0,0,148516.79,1,0


Feature scaling, critical for training the deep neural network, even for 0/1 categorical variables

In [69]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Build Artificial Neural Network

In [70]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Initialize ANN: Define sequence of layers or define graph, here we will use the first way

In [71]:
# Initialize the ANN
classifier = Sequential()

# Add input layer and first hidden layer
classifier.add(Dense(activation="relu", units=6, kernel_initializer="uniform", input_dim=11))
classifier.add(Dense(activation="relu", units=6, kernel_initializer="uniform"))

# Add output layer
classifier.add(Dense(activation="sigmoid", units=1, kernel_initializer="uniform"))

classifier.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_727 (Dense)            (None, 6)                 72        
_________________________________________________________________
dense_728 (Dense)            (None, 6)                 42        
_________________________________________________________________
dense_729 (Dense)            (None, 1)                 7         
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


In [72]:
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [73]:
classifier.fit(X_train, y_train, batch_size = 32, nb_epoch =100, validation_split=0.1)
y_pred = classifier.predict(X_test)>0.5
from sklearn.metrics import accuracy_score
print(accuracy_score(np.array(y_test), y_pred))
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
cm.view()



Train on 8100 samples, validate on 900 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


array([[778,  18],
       [154,  50]])

### Perform cross validation using keras wrapping for sklearn

In [74]:
from keras.wrappers.scikit_learn import KerasClassifier

In [75]:
from sklearn.model_selection import cross_val_score

In [79]:
def build_classifier():
    # Build the structure of ANN
    classifier = Sequential()
    
    classifier.add(Dense(units=6, activation='relu', kernel_initializer='uniform', input_dim=11))
    classifier.add(Dense(units=6, activation='relu', kernel_initializer='uniform'))
    classifier.add(Dense(units=1, activation='sigmoid', kernel_initializer='uniform'))
    
    classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return classifier

In [81]:
classifier = KerasClassifier(build_fn=build_classifier, batch_size=32, epochs=100)
accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=10, n_jobs=-1)

Exception ignored in: <function WeakValueDictionary.__init__.<locals>.remove at 0x7ff535550510>
Traceback (most recent call last):
  File "/usr/lib/python3.5/weakref.py", line 108, in remove
    def remove(wr, selfref=ref(self)):
KeyboardInterrupt


Epoch 1/100
Epoch 1/100
Epoch 1/100
Epoch 1/100


KeyboardInterrupt: 

In [26]:
accuracies 

array([ 0.83444444,  0.82555556,  0.83222222,  0.84      ,  0.83111111,
        0.81444444,  0.83222222,  0.82888889,  0.82666667,  0.84888889])

In [27]:
# Calculate the mean and variance of the accuracies to evaluate the performance
acc_mean = accuracies.mean()
acc_std = accuracies.std()

In [45]:
# If overfitting, i.e., high variance, we will implement dropout
from keras.layers import Dropout

def build_ann_drop(hidden_u=6, opt='adam'):
    classifier = keras.Sequential()
    classifier.add(Dense(units=hidden_u, activation='relu', kernel_initializer='uniform', input_dim=11))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=hidden_u, activation='relu', kernel_initializer='uniform'))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units=1, activation='sigmoid', kernel_initializer='uniform'))
    classifier.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    
    return classifier
classifier_drop = KerasClassifier(build_fn=build_ann_drop, batch_size=32, epochs=10)

In [38]:
accuracies_drop = cross_val_score(estimator=classifier_drop, X=X_train, y=y_train, cv=10, n_jobs=-1)



Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
  32/8100 [..............................] - ETA: 0s - loss: 0.4061 - acc: 0.8125Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10

In [39]:
accuracies_drop

array([ 0.79222222,  0.79111111,  0.80888889,  0.79555556,  0.82888889,
        0.81333333,  0.85666667,  0.81666667,  0.79333333,  0.81444444])

### Hyperparameter tuning

In [40]:
from sklearn.model_selection import GridSearchCV

In [43]:
# Create dictionary of hyperparameters that we want to optimize
parameters ={
    'batch_size': [32, 64],
    'epochs': [10,20,50],
    'hidden_u':[6,12],
    'opt': ['Adam', 'rmsprop'],
}

In [51]:
# Create grid search object
grid_search = GridSearchCV(estimator=classifier_drop, 
                          param_grid=parameters,
                          scoring = 'accuracy',
                          cv= 10)

In [None]:
# Fit grid search to the training set
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_.mean()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E