# ANN Notebook

#### *Author: Kunyu He*
#### *University of Chicago, CAPP'20*

In [18]:
import pandas as pd
import numpy as np
import keras

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import confusion_matrix

### Load Data

In [2]:
churn = pd.read_csv("Churn_Modelling.csv")
churn.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


The bank witnessed high churn (customers leave bank) rate, and wants to lower the exiting rate. We are predicting which customer is leaving the bank.

### Data Cleaning

In [3]:
churn.shape

(10000, 14)

In [4]:
churn.isnull().sum(axis=0)

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

No value missing.

### Feature Engineering

In [5]:
X = churn.iloc[:, 3:13].values
X.shape

(10000, 10)

In [6]:
y = churn.Exited.values.astype(float)
y.shape

(10000,)

In [7]:
X[:, 2] = LabelEncoder().fit_transform(X[:, 2])

dummies = OneHotEncoder(categories="auto").fit_transform(X[:, [1]]).toarray()
X = np.delete(X, 1, 1)
X = np.column_stack((dummies[:, 1:], X)).astype(float) # drop one category

X.shape

(10000, 11)

### Training and Test Set Split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

### Feature Scaling

In [9]:
sc_X = StandardScaler()

X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

### Model Training

In [14]:
ann = Sequential()

Tip: number of neurons in the hidden layers can be set to the average numbers of neurons in the input and output layers as an initial guess.

In [15]:
ann.add(Dense(512, activation="relu", input_shape=(11,)))
ann.add(Dense(512, activation="relu"))
ann.add(Dense(1, activation="sigmoid"))

In [16]:
ann.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [17]:
ann.fit(X_train, y_train, batch_size=10, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x19bf8a82390>

### Model Evaluation

In [26]:
confusion_matrix(y_test, ann.predict(X_test) > 0.5)

array([[1449,  137],
       [ 221,  193]], dtype=int64)

Obviously, our model is over-fitting the training set. Try a less complex one with less neurons in the hidden layers.

In [28]:
ann6 = Sequential()
ann6.add(Dense(6, activation="relu", input_shape=(11,)))
ann6.add(Dense(6, activation="relu"))
ann6.add(Dense(1, activation="sigmoid"))
ann6.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

ann6.fit(X_train, y_train, batch_size=10, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x19bf9e7def0>

In [29]:
confusion_matrix(y_test, ann6.predict(X_test) > 0.5)

array([[1527,   59],
       [ 217,  197]], dtype=int64)

No over-fitting.