IMPORTING ALL THE NEEDED LIBRARIES:

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle

LOADING THE DATASET:

In [3]:
#Loading a dataset:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


PREPROCESSING DATA:

In [4]:
#preprocessing data:
#dropping irrelevant features or columns:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)


In [5]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


ENCODING-USING LABEL AND ONE HOT ENCODING:

GENDER ENCODING:

In [6]:
label_encoder_gender = LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])

In [7]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,1,39,5,0.00,2,1,0,96270.64,0
9996,516,France,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,0,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,1,42,3,75075.31,2,1,0,92888.52,1


GEOGRAPHY ENCODING

we add two brackets to get all the 3 categories leading to shape 3

In [8]:
onehotencoding_geo = OneHotEncoder()
geo_encoder = onehotencoding_geo.fit_transform(data[['Geography']])

In [9]:
geo_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [10]:
geo_encoder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [11]:
#this gives all the categories in the Geography:
onehotencoding_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [12]:
geoencoded_df = pd.DataFrame(geo_encoder.toarray(), columns=onehotencoding_geo.get_feature_names_out(['Geography']))


In [13]:
geoencoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [14]:
#now combining all the encoded and original data:
data = pd.concat([data.drop('Geography', axis=1), geoencoded_df], axis=1)

In [15]:
data

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


SAVING INTO A PICKLE FILE:

In [16]:
#saving gender in pickle file:-numerical format
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

#saving geography in pickle file - 
with open('onehot_encoding_geo.pkl', 'wb') as file:
    pickle.dump(onehotencoding_geo, file)



PREPARING DATA FOR TRAINING:

In [17]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [18]:
#Dividing into dependent and independent features:
X = data.drop('Exited', axis=1)
y = data['Exited']

In [19]:
X

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,0.0,1.0,0.0


In [20]:
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [21]:
#Splitting data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


Scaling:

In [22]:
# Now scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [23]:
X_train

array([[ 0.21835119,  0.91186722,  1.91661905, ...,  1.00053348,
        -0.57776083, -0.57735027],
       [ 2.05728037,  0.91186722,  0.20210899, ..., -0.99946681,
         1.73082   , -0.57735027],
       [ 0.75860157, -1.09665089, -0.75039661, ...,  1.00053348,
        -0.57776083, -0.57735027],
       ...,
       [ 0.86249588, -1.09665089, -0.08364269, ...,  1.00053348,
        -0.57776083, -0.57735027],
       [ 0.15601461,  0.91186722,  0.3926101 , ...,  1.00053348,
        -0.57776083, -0.57735027],
       [ 0.46769752,  0.91186722,  1.15461458, ..., -0.99946681,
         1.73082   , -0.57735027]])

In [24]:
X_test

array([[-0.58163494,  0.91186722, -0.65514605, ..., -0.99946681,
         1.73082   , -0.57735027],
       [-0.30112032,  0.91186722,  0.3926101 , ...,  1.00053348,
        -0.57776083, -0.57735027],
       [-0.52968779, -1.09665089,  0.48786066, ..., -0.99946681,
        -0.57776083,  1.73205081],
       ...,
       [ 1.20534708,  0.91186722, -1.32189996, ...,  1.00053348,
        -0.57776083, -0.57735027],
       [ 0.39497151, -1.09665089, -0.27414381, ...,  1.00053348,
        -0.57776083, -0.57735027],
       [-0.4985195 , -1.09665089, -1.32189996, ..., -0.99946681,
         1.73082   , -0.57735027]])

SAVING SCALER AS PICKLE- FUTURE USE:

In [25]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

CREATING THE MODEL:

IMPORTING REQUIRED LIBRARIES:

In [46]:
import tensorflow as tf

#for sequential models:- for a sequence of layers
from tensorflow.keras.models import Sequential
#used for creating neurons or nodes
from tensorflow.keras.layers import Dense
#tensor board for visualize and early stopping to stop run if optimized
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
#to store when a log is entered to be seen in tensorboard.
import datetime

1. CREATING A SEQUENTIAL MODEL:

In [27]:
#No. of INPUTS or Features:
X_train.shape

(7500, 12)

In [29]:
#for just the columns that gives no. of inputs:
X_train.shape[1]

12

In [37]:
(X_train.shape[1],)

(12,)

In [38]:
#creating model:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid') #output layer
]
)

In [39]:
model

<keras.src.engine.sequential.Sequential at 0x13aa69150>

In [40]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


COMPILING MODEL - DOING FORWARD AND BACKWARD PROPAGATION:

In [42]:
#setting optimizer
opt = tf.keras.optimizers.Adam(learning_rate=0.012)
#setting loss
lossused = tf.keras.losses.BinaryCrossentropy()
model.compile(optimizer=opt, loss=lossused, metrics=['accuracy'])


TENSORBOARD SETUP:

In [47]:
#tensor board set up

#creating directory that will be visible in tensor board:
log_dir = "logs/fit/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [48]:
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

EARLYSTOPPING SETUP:

In [49]:
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


TRAINING MODEL:

In [52]:
#this stores all the history of logs of all the forward and backward and epochs till optimized.

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, callbacks=[tensorflow_callback, early_stopping_callback])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [None]:
#stopped at 13 with patience of 5.


SAVING THE TRAINED MODEL:

In [53]:
#saving as a h5 file
model.save('model.h5')

  saving_api.save_model(


This way the model that is trained is save and can start back if new data is added or any update.
so,  no need to train even if kernel stops.

LOADING TENSORBOARD:

In [55]:
#loading tensorboard extension:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [57]:
#giving log for tensor board it appears here
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6007 (pid 20954), started 0:00:50 ago. (Use '!kill 20954' to kill it.)