In [1]:
import os

In [2]:
os.makedirs('artifacts', exist_ok=True)

In [3]:
os.makedirs('artifacts\data', exist_ok=True)

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

In [5]:
df = pd.read_csv('artifacts\data\Churn_Modelling.csv')

In [6]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
df.isna().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [8]:
df.drop(columns=['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [53]:
df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [10]:
cat_cols = ['Geography','Gender']
num_cols = ['CreditScore','Age','Balance','EstimatedSalary']

In [11]:
x = df.drop(columns=['Exited'])
x

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.00,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.80,3,1,0,113931.57
3,699,France,Female,39,1,0.00,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77
9997,709,France,Female,36,7,0.00,1,0,1,42085.58
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52


In [12]:
y = df['Exited']
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.33,random_state=42)

In [14]:
x_train.shape,x_test.shape

((6700, 10), (3300, 10))

In [15]:
y_train.shape,y_test.shape

((6700,), (3300,))

In [16]:
cat_pipe = Pipeline([('ohe',OneHotEncoder())])

In [17]:
num_pipe = Pipeline([('scaler',StandardScaler())])

In [18]:
processor = ColumnTransformer([('cat',cat_pipe,cat_cols),('num',num_pipe,num_cols)])

In [19]:
x_train_pro = processor.fit_transform(x_train)

In [20]:
x_train_trans = pd.DataFrame(data=x_train_pro,columns=processor.get_feature_names_out())

In [21]:
x_train_trans.head(2)

Unnamed: 0,cat__Geography_France,cat__Geography_Germany,cat__Geography_Spain,cat__Gender_Female,cat__Gender_Male,num__CreditScore,num__Age,num__Balance,num__EstimatedSalary
0,0.0,1.0,0.0,1.0,0.0,1.675888,-0.941837,0.478618,1.504937
1,0.0,1.0,0.0,0.0,1.0,0.303207,-0.179872,-0.046891,-1.311771


In [22]:
x_test_pro = processor.transform(x_test)

In [23]:
x_test_trans = pd.DataFrame(data=x_test_pro,columns=processor.get_feature_names_out())

In [24]:
x_test_trans.head(2)

Unnamed: 0,cat__Geography_France,cat__Geography_Germany,cat__Geography_Spain,cat__Gender_Female,cat__Gender_Male,num__CreditScore,num__Age,num__Balance,num__EstimatedSalary
0,0.0,1.0,0.0,0.0,1.0,-0.580717,-0.6561,0.329176,-1.026163
1,1.0,0.0,0.0,0.0,1.0,-0.299941,0.391602,-1.216488,0.784718


In [25]:
x_train = x_train.drop(columns=['CreditScore','Geography','Gender','Age','Balance','EstimatedSalary'],axis=1)

In [26]:
x_train.reset_index(drop=True,inplace=True)

In [27]:
x_trained = pd.concat([x_train_trans,x_train],axis=1)

In [28]:
x_test = x_test.drop(columns=['CreditScore','Geography','Gender','Age','Balance','EstimatedSalary'],axis=1)

In [29]:
x_test.reset_index(drop=True,inplace=True)

In [30]:
x_tested = pd.concat([x_test_trans,x_test],axis=1)

In [31]:
x_trained

Unnamed: 0,cat__Geography_France,cat__Geography_Germany,cat__Geography_Spain,cat__Gender_Female,cat__Gender_Male,num__CreditScore,num__Age,num__Balance,num__EstimatedSalary,Tenure,NumOfProducts,HasCrCard,IsActiveMember
0,0.0,1.0,0.0,1.0,0.0,1.675888,-0.941837,0.478618,1.504937,5,1,0,0
1,0.0,1.0,0.0,0.0,1.0,0.303207,-0.179872,-0.046891,-1.311771,8,2,1,1
2,0.0,1.0,0.0,0.0,1.0,-0.102358,0.963076,0.712431,-1.320815,4,1,1,0
3,1.0,0.0,0.0,1.0,0.0,-0.414331,-0.751346,0.669297,-0.812483,8,1,1,1
4,1.0,0.0,0.0,0.0,1.0,-0.684708,-0.560855,-1.216488,-1.628920,9,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6695,1.0,0.0,0.0,0.0,1.0,1.207928,1.439304,-0.102296,-0.548425,8,1,1,1
6696,1.0,0.0,0.0,1.0,0.0,0.313606,1.820287,-1.216488,-1.737453,1,1,1,1
6697,1.0,0.0,0.0,1.0,0.0,0.864758,-0.084626,-1.216488,-0.152991,1,3,0,0
6698,1.0,0.0,0.0,0.0,1.0,0.157619,0.391602,1.823844,-0.061436,8,1,1,0


In [32]:
x_tested

Unnamed: 0,cat__Geography_France,cat__Geography_Germany,cat__Geography_Spain,cat__Gender_Female,cat__Gender_Male,num__CreditScore,num__Age,num__Balance,num__EstimatedSalary,Tenure,NumOfProducts,HasCrCard,IsActiveMember
0,0.0,1.0,0.0,0.0,1.0,-0.580717,-0.656100,0.329176,-1.026163,3,2,0,0
1,1.0,0.0,0.0,0.0,1.0,-0.299941,0.391602,-1.216488,0.784718,1,2,1,1
2,0.0,0.0,1.0,1.0,0.0,-0.528721,0.486847,-1.216488,-0.735758,4,2,1,0
3,0.0,1.0,0.0,0.0,1.0,-1.516636,1.915532,0.687874,1.205455,8,2,1,1
4,0.0,0.0,1.0,1.0,0.0,-0.955084,-1.132329,0.781274,0.235702,7,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3295,1.0,0.0,0.0,1.0,0.0,2.060655,0.010619,0.050294,1.486478,7,1,0,1
3296,0.0,0.0,1.0,0.0,1.0,0.220014,0.105865,0.727465,-1.693266,1,1,1,1
3297,1.0,0.0,0.0,1.0,0.0,-0.653511,-0.751346,0.551753,0.816568,10,1,1,0
3298,1.0,0.0,0.0,0.0,1.0,-2.223775,2.010778,0.036408,-0.893950,7,1,0,1


## ANN Implementation

In [33]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [34]:
## Build Our ANN Model
model = Sequential([
    Dense(units=64, activation='relu', input_shape=(x_trained.shape[1],)), ## HL1 Connected with input layers
    Dense(units=32, activation='relu'),   ## HL2 Connected with input layers
    Dense(units=1, activation='sigmoid')  ## Output layer
]

)

## Compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

## Summarize the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                896       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 3009 (11.75 KB)
Trainable params: 3009 (11.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [35]:
## Set up Tensorboard
tensorboard_callback = TensorBoard(log_dir='./logs',histogram_freq = 1)

In [36]:
## Set up Early Stopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10,restore_best_weights=True)

In [52]:
## Train the model

history = model.fit(
    x_trained, y_train, epochs=100, validation_data=(x_tested, y_test), 
    callbacks=[tensorboard_callback, early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [38]:
## Load Tensorboard

%load_ext tensorboard
%tensorboard --logdir logs/

Launching TensorBoard...

In [39]:
x_trained

Unnamed: 0,cat__Geography_France,cat__Geography_Germany,cat__Geography_Spain,cat__Gender_Female,cat__Gender_Male,num__CreditScore,num__Age,num__Balance,num__EstimatedSalary,Tenure,NumOfProducts,HasCrCard,IsActiveMember
0,0.0,1.0,0.0,1.0,0.0,1.675888,-0.941837,0.478618,1.504937,5,1,0,0
1,0.0,1.0,0.0,0.0,1.0,0.303207,-0.179872,-0.046891,-1.311771,8,2,1,1
2,0.0,1.0,0.0,0.0,1.0,-0.102358,0.963076,0.712431,-1.320815,4,1,1,0
3,1.0,0.0,0.0,1.0,0.0,-0.414331,-0.751346,0.669297,-0.812483,8,1,1,1
4,1.0,0.0,0.0,0.0,1.0,-0.684708,-0.560855,-1.216488,-1.628920,9,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6695,1.0,0.0,0.0,0.0,1.0,1.207928,1.439304,-0.102296,-0.548425,8,1,1,1
6696,1.0,0.0,0.0,1.0,0.0,0.313606,1.820287,-1.216488,-1.737453,1,1,1,1
6697,1.0,0.0,0.0,1.0,0.0,0.864758,-0.084626,-1.216488,-0.152991,1,3,0,0
6698,1.0,0.0,0.0,0.0,1.0,0.157619,0.391602,1.823844,-0.061436,8,1,1,0


In [41]:
model.save('my_model.h5')

  saving_api.save_model(


In [42]:
model.save_weights('my_model.weights')

In [44]:
model.load_weights('my_model.weights')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2874b6a9970>

In [47]:
import pickle
pickle.dump(processor,open('pro.pkl','wb'))