In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [17]:
dataset = pd.read_csv("/content/drive/My Drive/Churn_Modelling.csv")
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Data Preprocessing

In [18]:
# Removing customerID and customer surname as it wont be useful in predicting if the customer will churn or not.
X = dataset.iloc[:,3:-1].values
y = dataset.iloc[:,-1].values

In [19]:
# Label Encoding Gender column.
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])

In [20]:
# OneHotEncoding Geography column.
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder

ct = make_column_transformer((OneHotEncoder(sparse=False),[1]),remainder="passthrough")
X = ct.fit_transform(X)

In [21]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test =  train_test_split(X,y,test_size=0.2,random_state=101)

In [22]:
# Performing feature scaling of all the columns as feature scaling is very important for NN.
# Perform feature scaling of all the columns even if it contains only 0 and 1.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train  = sc.fit_transform(X_train)
X_test = sc.transform(X_test) # Only doing transform on test set and not fit to avoid data leakage.

Building ANN

In [23]:
ann = tf.keras.models.Sequential()

In [24]:
# We have chossen units(no.of hidden layers we want) as 6 randomly and an ideal units can only be decided by hyperparamter tunning .
# There is no rule of thumb to decide no.of hidden layers in a NN.

ann.add(tf.keras.layers.Dense(units=6,activation="relu"))
ann.add(tf.keras.layers.Dense(units=6,activation="relu"))

In [25]:
# Addiing output layer.Here we have given units as 1 because dependent variable that we have in this problem is binary(0 or 1)i.e churn or no churn.
# But if we have a problem where we have 3 dependent variable like A,B,C then we need 3 o/p neuron i.e units will be 3 .Also in that case activation function should be softmax instead of sigmoid.
# By using sigmoid function in the o/p layer we not only get the result as 0 and 1 but also we can see the probability of our result.

ann.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))

In [26]:
# We are using binary_crossentropy as the loss function as we are dealing with binary classification problem.
# In case of more than 2 dependent variable we use categorical_crossentropy

ann.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [27]:
# batch_size is by default 32 but it can be tunned during hyper parameter tunning to find the best value of it .

ann.fit(X_train,y_train,batch_size=32,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f4e22a8f6d8>

Predicting using our trained ANN

In [28]:
# Input to predict :
# 1. Always a 2D array i.e [[]]
# 2. As we have onehot encoded Geography column , we cannot directly put "France" as input for that column and so we have provided 1 0 0 below.
# 3. As our NN is not trained on the actual data but rather on the normalised data, we need to apply the same transformation to the predict's input also.

# It is always advisable to create pipeline of all the encoding and transformation along with the model and do .fit on the pipeline 
# and use .predict on the pipeline itslef.This will solve the problem of applying transformation/preprocessing seperatly to the incoming data.

ann.predict(sc.transform([[1,0,0,600,1,40,3,60000,2,1,1,50000]]))

array([[0.02763263]], dtype=float32)

In [33]:
y_predict = ann.predict(X_test)
y_predict = (y_predict > 0.5)
y_predict

array([[False],
       [False],
       [ True],
       ...,
       [False],
       [False],
       [ True]])

In [35]:
from sklearn.metrics import confusion_matrix , accuracy_score
cm = confusion_matrix(y_test,y_predict)
cm

array([[1515,   62],
       [ 205,  218]])

In [36]:
accuracy_score(y_test,y_predict)

0.8665