In [4]:
import tensorflow as tf

#loading data
import pandas as pd
#scaling feuture values
from sklearn.preprocessing import StandardScaler
#Encoding target values
from sklearn.preprocessing import LabelEncoder
#shuffle data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [5]:
data = pd.read_csv('datasets/Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [8]:
data = shuffle(data)

In [9]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
7350,7351,15713160,Lin,669,Spain,Male,25,7,157228.61,2,1,0,124382.9,0
3850,3851,15775293,Stephenson,680,France,Male,34,3,143292.95,1,1,0,66526.01,0
8924,8925,15808566,Hs?,516,France,Male,46,2,0.0,2,1,1,169122.54,0
6692,6693,15677087,Green,662,France,Female,39,5,138106.75,1,0,0,19596.73,0
9346,9347,15805681,Chamberlain,716,France,Male,41,9,0.0,1,1,1,113267.48,0


## Data Preprocessing

## 1) Checking Nulls

In [13]:
data.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

## 2) Selecting Features and Labels

In [15]:
X = data.drop(labels=['CustomerId','RowNumber','Surname','Exited'],axis=1)
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
7350,669,Spain,Male,25,7,157228.61,2,1,0,124382.9
3850,680,France,Male,34,3,143292.95,1,1,0,66526.01
8924,516,France,Male,46,2,0.0,2,1,1,169122.54
6692,662,France,Female,39,5,138106.75,1,0,0,19596.73
9346,716,France,Male,41,9,0.0,1,1,1,113267.48


In [16]:
y = data['Exited']
y.head()

7350    0
3850    0
8924    0
6692    0
9346    0
Name: Exited, dtype: int64

## 3) Encoding Categorical Columns

In [23]:
label = LabelEncoder()
X['Geography'] = label.fit_transform(X['Geography'])
X['Gender'] = label.fit_transform(X['Gender'])

In [26]:
X =pd.get_dummies(X,drop_first=True,columns=['Geography'])

{0, 1, 2}

## 4) Scaling Numerical Values

In [28]:
scaler = StandardScaler()
X = scaler.fit_transform(X)


array([[ 0.19111736,  1.51506738,  0.91241915, ...,  0.64609167,
        -1.03067011,  0.42242509],
       [ 0.3049319 , -0.90188624,  0.91241915, ...,  0.64609167,
        -1.03067011, -0.58364842],
       [-1.39193932, -0.90188624,  0.91241915, ...,  0.64609167,
         0.97024255,  1.20040272],
       ...,
       [ 1.06024652, -0.90188624,  0.91241915, ..., -1.54776799,
         0.97024255,  1.66019311],
       [ 0.44978676, -0.90188624, -1.09598752, ..., -1.54776799,
        -1.03067011,  0.21262662],
       [ 0.3049319 ,  0.30659057, -1.09598752, ...,  0.64609167,
        -1.03067011,  0.68631534]])

## Creating Training and Testing Datasets

In [49]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

## Defining ANN

In [55]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128,activation='relu',input_dim=X_train.shape[1]),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 128)               1408      
                                                                 
 dense_9 (Dense)             (None, 64)                8256      
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 1)                 33        
                                                                 
Total params: 11,777
Trainable params: 11,777
Non-trainable params: 0
_________________________________________________________________


In [56]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [57]:
!rm -rf ./log/

## Tensorboard Visualization

In [58]:
import datetime,os

logdir = os.path.join("log",datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir,histogram_freq=1)

## Model Training

In [60]:
r = model.fit(X_train,y_train,batch_size=32,validation_data=[X_test,y_test],epochs=50,callbacks=[tensorboard_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [66]:
%load_ext tensorboard
%tensorboard --logdir ./log

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [67]:
model.evaluate(X_test,y_test)



[0.6151443719863892, 0.824999988079071]

In [88]:
from sklearn.metrics import confusion_matrix
import numpy as np
y_pred = model.predict(X_test)
y_pred=(model.predict(X_test) > 0.5).astype("int32")
cf = confusion_matrix(y_test,y_pred)
cf



array([[2195,  165],
       [ 360,  280]])

In [84]:
from mlxtend import pl

0