In [61]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

import pickle

from tensorflow.python.keras.models import load_model

In [62]:
#load the dataset using pandas
data = pd.read_csv("bankmodels.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,Portland,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Salem,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,Portland,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,Portland,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Salem,Female,43,2,125510.82,1,1,1,79084.1,0


In [63]:
## Preprocess the data
### Drop irrelevant columns
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,Portland,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Salem,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,Portland,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,Portland,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Salem,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,Portland,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,Portland,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,Portland,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Tigard,Male,42,3,75075.31,2,1,0,92888.52,1


In [64]:
## Encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,Portland,0,42,2,0.00,1,1,1,101348.88,1
1,608,Salem,0,41,1,83807.86,1,0,1,112542.58,0
2,502,Portland,0,42,8,159660.80,3,1,0,113931.57,1
3,699,Portland,0,39,1,0.00,2,0,0,93826.63,0
4,850,Salem,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,Portland,1,39,5,0.00,2,1,0,96270.64,0
9996,516,Portland,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,Portland,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Tigard,1,42,3,75075.31,2,1,0,92888.52,1


In [65]:
## Onehot encode 'Geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [66]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_Portland', 'Geography_Salem', 'Geography_Tigard'],
      dtype=object)

In [67]:
geo_encoded_df=pd.DataFrame(geo_encoder,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_Portland,Geography_Salem,Geography_Tigard
0,1.0,0.0,0.0
1,0.0,1.0,0.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,1.0,0.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,0.0,1.0


In [68]:
## Combine one hot encoder columns with the original data
data=pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Portland,Geography_Salem,Geography_Tigard
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,1.0,0.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,1.0,0.0


In [69]:
## Save the encoders and sscaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)

In [70]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Portland,Geography_Salem,Geography_Tigard
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,1.0,0.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,1.0,0.0


In [71]:
## DiVide the dataset into indepent and dependent features
X=data.drop('Exited',axis=1)
y=data['Exited']


In [72]:
## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [73]:
## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [74]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57638802, -0.57946723],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
        -0.57638802,  1.72572313],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
         1.73494238, -0.57946723],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57638802, -0.57946723],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57638802, -0.57946723],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
        -0.57638802,  1.72572313]])

In [75]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [76]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Portland,Geography_Salem,Geography_Tigard
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,1.0,0.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,0.0,1.0


<h2>ANN Implmentation

In [77]:
import tensorflow as tf
print(tf.__version__)
import os


2.18.0


In [78]:
(X_train.shape[1],)

(12,)

In [79]:
input_shape=(X_train.shape[1],)

In [80]:
# Build Our ANN Model
model=tf.keras.models.Sequential([
    tf.keras.layers.Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## HL1
    tf.keras.layers.Dense(32,activation='relu'), ## HL2
    tf.keras.layers.Dense(1,activation='sigmoid')  ## output layer
  ]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [81]:
model.summary()

In [83]:
## compile the model

opt = tf.keras.optimizers.Adam(learning_rate=0.01)

model.compile(
  optimizer=opt,
  loss="binary_crossentropy",
  metrics=['accuracy']
)

In [84]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [85]:

import datetime

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


In [86]:
## Set up Early Stopping
early_stopping_callback=tf.keras.callbacks.EarlyStopping(
  monitor='val_loss',
  patience=10,
  restore_best_weights=True
)


In [87]:
# Train the model
history=model.fit(
    X_train,
    y_train,
    validation_data=(X_test,y_test),
    epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)



Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 694us/step - accuracy: 0.8144 - loss: 0.4352 - val_accuracy: 0.8555 - val_loss: 0.3534
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497us/step - accuracy: 0.8543 - loss: 0.3538 - val_accuracy: 0.8475 - val_loss: 0.3610
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 516us/step - accuracy: 0.8636 - loss: 0.3426 - val_accuracy: 0.8530 - val_loss: 0.3520
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 510us/step - accuracy: 0.8593 - loss: 0.3436 - val_accuracy: 0.8550 - val_loss: 0.3487
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 507us/step - accuracy: 0.8551 - loss: 0.3441 - val_accuracy: 0.8605 - val_loss: 0.3489
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 507us/step - accuracy: 0.8610 - loss: 0.3379 - val_accuracy: 0.8535 - val_loss: 0.3544
Epoch 7/10

In [88]:
# model save
model.save('model.h5')



In [89]:
## Load Tensorboard Extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [90]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 1551), started 0:08:46 ago. (Use '!kill 1551' to kill it.)

In [91]:

### Load the trained model, scaler pickle,onehot
model=tf.keras.models.load_model('model.h5')

## load the encoder and scaler
with open('onehot_encoder_geo.pkl','rb') as file:
    label_encoder_geo=pickle.load(file)

with open('label_encoder_gender.pkl', 'rb') as file:
    label_encoder_gender = pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)





In [92]:
# Example input data
input_data = {
    'CreditScore': 600,
    'Geography': 'Portland',
    'Gender': 'Male',
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}

In [93]:
# One-hot encode 'Geography'
geo_encoded = label_encoder_geo.transform([[input_data['Geography']]]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=label_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df



Unnamed: 0,Geography_Portland,Geography_Salem,Geography_Tigard
0,1.0,0.0,0.0


In [97]:
input_df=pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,Portland,Male,40,3,60000,2,1,1,50000


In [98]:
## Encode categorical variables
input_df['Gender']=label_encoder_gender.transform(input_df['Gender'])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,Portland,1,40,3,60000,2,1,1,50000


In [99]:
## concatination one hot encoded
input_df=pd.concat([input_df.drop("Geography",axis=1),geo_encoded_df],axis=1)
input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Portland,Geography_Salem,Geography_Tigard
0,600,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [100]:
## Scaling the input data
input_scaled=scaler.transform(input_df)
input_scaled

array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349, -0.25781119,
         0.80843615,  0.64920267,  0.97481699, -0.87683221,  1.00150113,
        -0.57638802, -0.57946723]])

In [101]:
## PRedict churn
prediction=model.predict(input_scaled)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


array([[0.03674701]], dtype=float32)

In [102]:
prediction_proba = prediction[0][0]

In [103]:
prediction_proba

0.036747012

In [104]:
if prediction_proba > 0.5:
    print('The customer is likely to leave.')
else:
    print('The customer is likely to stay.')

The customer is likely to stay.
