In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
%matplotlib inline

In [None]:
df = pd.read_csv('Churn_Modelling.csv')
df

In [None]:
X = df.iloc[:, 3:13].values # higher bound is excluded so 12 + 1 

In [None]:
y = df.iloc[:, 13].values

In [None]:
# Encode categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

labelEncoder_X_1 = LabelEncoder()          
X[:, 1] = labelEncoder_X_1.fit_transform(X[:, 1])         # for country 

labelEncoder_X_2 = LabelEncoder()
X[:, 2] = labelEncoder_X_2.fit_transform(X[:, 2])        # for gender

from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('encoder', OneHotEncoder(), [1])], 
                        remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)

X = X[:, 1:] # avoid the dummy variable trap

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

> **feature scaling is absolutely necessary for neural networks**

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# just to draw the neural network in matplotlib

def draw_neural_net(ax, left, right, bottom, top, layer_sizes):
    n_layers = len(layer_sizes)
    v_spacing = (top - bottom)/float(max(layer_sizes))
    h_spacing = (right - left)/float(len(layer_sizes) - 1)
    # Nodes
    for n, layer_size in enumerate(layer_sizes):
        layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2.
        for m in range(layer_size):
            circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4.,
                                color='w', ec='k', zorder=4)
            ax.add_artist(circle)
    # Edges
    for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2.
        layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2.
        for m in range(layer_size_a):
            for o in range(layer_size_b):
                line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left],
                                  [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k')
                ax.add_artist(line)

                
fig = plt.figure(figsize=(12, 12))
ax = fig.gca()
ax.axis('off')
draw_neural_net(ax, .1, .9, .1, .9, [11, 8, 8, 1])              

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense

classifier = Sequential()

> no. of hidden neurons (nh) = (2/3)* (no.of input neurons + no.of output neurons)

In [None]:
nh = int((2/3)*(11+1))
nh

In [None]:
# Hidden layer 1
classifier.add(Dense(activation='relu', units=nh,
                     kernel_initializer='uniform', input_dim=11))

In [None]:
# Hidden layer 2
classifier.add(Dense(activation='relu', units=nh, kernel_initializer='uniform'))

In [None]:
# the output layer
# if dealing with several categories, will need multiple output neurons, and activation function would be softmax
classifier.add(Dense(activation="sigmoid", units=1, kernel_initializer="uniform")) 

# for getting the probabilistic o/p use sigmoid 

In [None]:
# Compiling the ANN
# adam is stochastic gradient descent algorithm
# use logarithmic loss function with sigmoid function (binary_crossentropy with 1 category)
# metric expect analysing parameters in a list, e.g. accuract

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])


In [None]:
# Fit ANN to training set
# by setting verbose to 0 you wont get any visualization 
history = classifier.fit(X_train, y_train, batch_size=10, epochs=20)

In [None]:
print(history.history.keys())

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
# plt.plot(history.history['loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

In [None]:
# summarize history for loss
plt.plot(classifier.history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

In [None]:
# from livelossplot import PlotLossesKeras
# classifier.fit(X_train, y_train, batch_size=10,
#                epochs=100,
#                validation_data=(X_test, y_test),
#                callbacks=[PlotLossesKeras()],
#                verbose=0)

In [None]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [None]:
# threshold to change y_pred from probability to true/false
y_pred = (y_pred > 0.5)

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [None]:
print(cm)
(1552+131)/(2000)

In [None]:
# 86% accuracy 

In [None]:
result = classifier.evaluate(X_test,  y_test, verbose=2)

dict(zip(classifier.metrics_names, result))  # zip pairs 1-1, 2-2, 3-3
