In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime
import pickle

In [2]:
data = pd.read_csv('Churn_Modelling.csv')
data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

In [3]:
leg = LabelEncoder()
data["Gender"] = leg.fit_transform(data['Gender'])
# print(data.columns)
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
print(geo_encoder)
onehot_encoder_geo.get_feature_names_out(['Geography'])
geo_encoded_df = pd.DataFrame(geo_encoder, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

[[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 ...
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [4]:
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

In [5]:
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(leg, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)


In [6]:
x = data.drop(['Exited'], axis=1)
y = data['Exited']

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scalar = StandardScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.transform(x_test)

In [8]:
with open('scalar.pkl', 'wb') as file:
    pickle.dump(scalar, file)

In [9]:
model = Sequential([
    Dense(64,activation='relu',input_shape=(x_train.shape[1],)),  #First hidden layer
    Dense(32,activation='relu'),  #Second hidden layer
    Dense(1,activation='sigmoid')  #Output layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.summary()

In [11]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [12]:
model.compile(optimizer=opt,loss='binary_crossentropy',metrics=['accuracy'])

In [23]:
log_dir="logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [24]:
early_stopping_callback = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [25]:
history = model.fit(
    x_train,y_train,validation_data=(x_test,y_test),epochs=100,
    callbacks = [tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8575 - loss: 0.3447 - val_accuracy: 0.8605 - val_loss: 0.3370
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8658 - loss: 0.3304 - val_accuracy: 0.8565 - val_loss: 0.3463
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8698 - loss: 0.3232 - val_accuracy: 0.8590 - val_loss: 0.3435
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8636 - loss: 0.3247 - val_accuracy: 0.8580 - val_loss: 0.3405
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8707 - loss: 0.3106 - val_accuracy: 0.8560 - val_loss: 0.3504
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8628 - loss: 0.3348 - val_accuracy: 0.8570 - val_loss: 0.3468
Epoch 7/100
[1m250/25

In [26]:
model.save('model.h5')



In [27]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [30]:
tensorboard --logdir logs/fit20250102-125653

Reusing TensorBoard on port 6007 (pid 5836), started 0:00:32 ago. (Use '!kill 5836' to kill it.)