In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# read data from csv
df = pd.read_csv('data/Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# unnecessary columns
# create X and y

X = df.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)
y = df['Exited']

In [5]:
# one hot endcoding 
X = pd.get_dummies(X, columns=['Geography','Gender'])
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,1,0


In [6]:
X.dtypes

CreditScore            int64
Age                    int64
Tenure                 int64
Balance              float64
NumOfProducts          int64
HasCrCard              int64
IsActiveMember         int64
EstimatedSalary      float64
Geography_France       uint8
Geography_Germany      uint8
Geography_Spain        uint8
Gender_Female          uint8
Gender_Male            uint8
dtype: object

In [8]:
# scale data 

from sklearn.preprocessing import StandardScaler

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
X_train

array([[-1.24021723,  0.77986083,  0.35390313, ..., -0.57504086,
         1.09665089, -1.09665089],
       [ 0.75974873, -0.27382717,  0.35390313, ..., -0.57504086,
        -0.91186722,  0.91186722],
       [-1.72725557, -0.9443559 , -0.3390904 , ..., -0.57504086,
         1.09665089, -1.09665089],
       ...,
       [-0.51484098,  0.87565065,  1.39339343, ..., -0.57504086,
        -0.91186722,  0.91186722],
       [ 0.73902369, -0.36961699, -1.03208393, ..., -0.57504086,
         1.09665089, -1.09665089],
       [ 0.95663657, -1.32751517,  0.35390313, ...,  1.73900686,
        -0.91186722,  0.91186722]])

In [10]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
import tensorflow as tf

In [11]:
#  stack of layers where each layer has exactly one input tensor and one output tensor
model = Sequential()

#eply connected neural network layer
model.add(Dense(1000, activation='relu', input_dim = X.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(1000, activation='relu'))
model.add(Dense(1000, activation="relu"))
model.add(Dense(1000, activation="relu"))
model.add(Dense(1000, activation="relu"))
model.add(Dense(500, activation="relu"))


model.add(Dense(1, activation = 'sigmoid'))

In [12]:
%%time
model.compile(optimizer='adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy'])

Wall time: 7.54 ms


In [19]:
%%time
model.fit(X_train, y_train.to_numpy(), batch_size = 20, epochs = 5, verbose = 2)

Epoch 1/5
400/400 - 7s - loss: 0.3353 - accuracy: 0.8631 - 7s/epoch - 17ms/step
Epoch 2/5
400/400 - 7s - loss: 0.3310 - accuracy: 0.8658 - 7s/epoch - 17ms/step
Epoch 3/5
400/400 - 7s - loss: 0.3310 - accuracy: 0.8610 - 7s/epoch - 19ms/step
Epoch 4/5
400/400 - 8s - loss: 0.3304 - accuracy: 0.8625 - 8s/epoch - 20ms/step
Epoch 5/5
400/400 - 8s - loss: 0.3286 - accuracy: 0.8649 - 8s/epoch - 20ms/step
Wall time: 36.8 s


<keras.callbacks.History at 0x2250808b9d0>

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1000)              14000     
                                                                 
 dropout (Dropout)           (None, 1000)              0         
                                                                 
 dense_1 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_2 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_3 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_4 (Dense)             (None, 1000)              1001000   
                                                                 
 dense_5 (Dense)             (None, 500)               5

In [15]:
y_preds = model.predict(X_test)
y_preds



array([[0.20225593],
       [0.17960888],
       [0.05097181],
       ...,
       [0.06818188],
       [0.98635334],
       [0.13434516]], dtype=float32)

In [16]:
# sklearn - model.score(features, labels)

model.evaluate(X_test, y_test.to_numpy())



[0.36565062403678894, 0.8475000262260437]

In [17]:
# y_pred = y_test.to_numpy()
y_test

1344    1
8167    0
4747    0
5004    1
3124    1
       ..
9107    0
8249    0
8337    0
6279    1
412     0
Name: Exited, Length: 2000, dtype: int64

In [20]:
model.save('my_model.h5')

In [22]:
from tensorflow.keras.models import load_model

In [23]:
loaded = load_model('my_model.h5')

In [26]:
pred = loaded.predict(X_test)



In [27]:
pred

array([[0.10656578],
       [0.15845482],
       [0.01632589],
       ...,
       [0.05642029],
       [0.9999999 ],
       [0.07931034]], dtype=float32)