In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

In [2]:
Churn_data=pd.read_csv("D:\Study\Python\scripts\Deep_Learning\Banking_Customer_Churn_Modeling\Churn_Modelling.csv")

In [3]:
Churn_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [4]:
Churn_data.shape

(10000, 14)

In [5]:
Churn_data.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [6]:
Churn_data.duplicated().sum()

0

In [7]:
Churn_data['Geography'].value_counts()

France     5014
Germany    2509
Spain      2477
Name: Geography, dtype: int64

In [8]:
Churn_data['Gender'].value_counts()

Male      5457
Female    4543
Name: Gender, dtype: int64

In [9]:
Churn_data['Gender']=pd.get_dummies(Churn_data['Gender'], drop_first=True)

In [10]:
Geography=pd.get_dummies(Churn_data['Geography'], drop_first=True)

In [11]:
Churn_data=pd.concat([Churn_data, Geography], axis=1)

In [12]:
Churn_data=Churn_data.drop(['Geography'], axis=1)

In [13]:
Churn_data=Churn_data.drop(['RowNumber','CustomerId','Surname'], axis=1)

In [14]:
Churn_data.head(10)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,1
5,645,1,44,8,113755.78,2,1,0,149756.71,1,0,1
6,822,1,50,7,0.0,2,1,1,10062.8,0,0,0
7,376,0,29,4,115046.74,4,1,0,119346.88,1,1,0
8,501,1,44,4,142051.07,2,0,1,74940.5,0,0,0
9,684,1,27,2,134603.88,1,1,1,71725.73,0,0,0


In [15]:
X=Churn_data.drop(['Exited'], axis=1)
Y=Churn_data['Exited']

In [16]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X=scaler.fit_transform(X)

In [17]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y, test_size=0.2, random_state=0)

# Hyperparameter Tuning using kerastuner to decide number of Neurons,hidden layers, learning rate for binary classification problem:

In [18]:
from tensorflow import keras
from keras.models import Sequential
from keras import layers
from kerastuner.tuners import RandomSearch

Using TensorFlow backend.


In [19]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='softmax'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    return model

In [20]:
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='Project',
    project_name='Banking_Customer_churn_Modelling')

INFO:tensorflow:Reloading Oracle from existing project Project\Banking_Customer_churn_Modelling\oracle.json
INFO:tensorflow:Reloading Tuner from Project\Banking_Customer_churn_Modelling\tuner0.json


In [21]:
tuner.search_space_summary()

Search space summary
Default search space size: 19
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_3 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_4 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_5 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_6 (Int)
{'default': None, 'co

In [68]:
# lets fit the model:

In [22]:
tuner.search(X, Y,
             epochs=5,
             validation_data=(X_test, Y_test))

INFO:tensorflow:Oracle triggered exit


In [23]:
tuner.results_summary()

Results summary
Results in Project\Banking_Customer_churn_Modelling
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
num_layers: 2
units_0: 224
units_1: 480
learning_rate: 0.001
units_2: 96
units_3: 416
units_4: 192
units_5: 416
units_6: 256
units_7: 192
units_8: 352
units_9: 64
units_10: 288
units_11: 416
units_12: 256
units_13: 128
units_14: 128
units_15: 352
units_16: 352
Score: 0.87199999888738
Trial summary
Hyperparameters:
num_layers: 11
units_0: 224
units_1: 64
learning_rate: 0.01
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
units_7: 32
units_8: 32
units_9: 32
units_10: 32
Score: 0.8675000071525574
Trial summary
Hyperparameters:
num_layers: 7
units_0: 512
units_1: 192
learning_rate: 0.01
units_2: 480
units_3: 384
units_4: 96
units_5: 64
units_6: 96
units_7: 448
units_8: 192
units_9: 64
units_10: 160
units_11: 288
units_12: 96
units_13: 128
units_14: 160
units_15: 448
units_16: 448
Score: 0.8668333292007446
Trial