In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder,OneHotEncoder
import tensorflow  as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime
import numpy as np
import pickle

In [21]:
## load Data sets

data =pd.read_excel("churn_Modelling_sample.csv.xlsx")
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,Unnamed: 10,Exited,HasCrCard,IsActiveMember,EstimatedSalary
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,2,,1,1,0,56719.205559
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,,1,0,0,30001.354509
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,,1,0,0,101124.386213
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,,1,0,0,17897.621064
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,,0,1,1,111079.828117


In [88]:
## preprocess the data
## drop the irrelevant columns
data = data.drop(["RowNumber","CustomerId","Surname"],axis=1)

In [118]:
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,Exited,HasCrCard,IsActiveMember,EstimatedSalary,gender
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,2,1,1,1,77283.066428,0
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,1,112296.418793,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,0,1,0,122465.732397,0
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,1,1,1,49594.09964,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,0,0,37454.02941,0


In [89]:
## Encode categorical variables
label_encoder_gender= LabelEncoder()
data["gender"] = label_encoder_gender.fit_transform(data["Gender"])
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,Exited,HasCrCard,IsActiveMember,EstimatedSalary,gender
0,619,France,Female,42,2,0.0,2,1,1,1,139341.470503,0
1,608,Spain,Female,41,1,83807.86,1,0,0,0,143775.920856,0
2,502,France,Female,42,8,159660.8,3,0,0,0,25558.330457,0
3,699,France,Female,39,1,0.0,2,0,0,1,47985.667122,0
4,850,Spain,Female,43,2,125510.82,1,1,1,1,131371.014161,0


In [32]:
## 0=female, 1 =male

In [11]:
## onehot encoder for GEO

onehot_encoder_geo = OneHotEncoder()
geo_encoded = onehot_encoder_geo.fit_transform(data[["Geography"]])
geo_encoded.toarray()

array([[1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [12]:
geo_encoded_df = pd.DataFrame(geo_encoded.toarray(),columns=onehot_encoder_geo.get_feature_names_out(["Geography"]) )

In [18]:
geo_encoded_df 

Unnamed: 0,Geography_France,Geography_Spain
0,1.0,0.0
1,0.0,1.0
2,1.0,0.0
3,1.0,0.0
4,0.0,1.0


In [22]:
data = pd.concat([data.drop("Geography",axis=1),geo_encoded_df],axis=1)
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,Unnamed: 10,Exited,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Spain
0,1,15634602,Hargrave,619,Female,42,2,0.0,2,,1,1,0,56719.205559,1.0,0.0
1,2,15647311,Hill,608,Female,41,1,83807.86,1,,1,0,0,30001.354509,0.0,1.0
2,3,15619304,Onio,502,Female,42,8,159660.8,3,,1,0,0,101124.386213,1.0,0.0
3,4,15701354,Boni,699,Female,39,1,0.0,2,,1,0,0,17897.621064,1.0,0.0
4,5,15737888,Mitchell,850,Female,43,2,125510.82,1,,0,1,1,111079.828117,0.0,1.0


In [93]:
## save the encoders and scaler

with open("label_encoder_gender.pkl","wb") as file:
    pickle.dump(label_encoder_gender,file)


with open("label_encoder_geo.pkl","wb") as file:
    pickle.dump(onehot_encoder_geo ,file)


In [94]:
with open("label_encoder_gender.pkl", "rb") as file:
    encoder = pickle.load(file)


In [95]:
with open("label_encoder_geo.pkl", "rb") as file:
    geo_encoder = pickle.load(file)

In [96]:
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,Exited,HasCrCard,IsActiveMember,EstimatedSalary,gender,Geography_France,Geography_Spain
0,619,Female,42,2,0.0,2,1,1,1,139341.470503,0,1.0,0.0
1,608,Female,41,1,83807.86,1,0,0,0,143775.920856,0,0.0,1.0
2,502,Female,42,8,159660.8,3,0,0,0,25558.330457,0,1.0,0.0
3,699,Female,39,1,0.0,2,0,0,1,47985.667122,0,1.0,0.0
4,850,Female,43,2,125510.82,1,1,1,1,131371.014161,0,0.0,1.0


In [23]:
# ## divide the datasets into depended and independed features
# 1. Drop target column
X = data.drop("Exited", axis=1)
y = data["Exited"]

# 2. Drop non-useful text columns
X = X.drop(['CustomerId', 'Surname', 'Name'], axis=1, errors='ignore')  # Add actual column names

# 3. Encode categorical columns
X = pd.get_dummies(X, drop_first=True)

# 4. Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Scale
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)




  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [98]:
X_test

array([[-0.4702382 , -0.33333333, -0.81150267,  0.17307828, -1.41421356,
        -1.        , -1.73205081,  1.15411426,  0.        , -1.73205081,
         1.73205081]])

In [99]:
with open("scaler.pkl",mode="wb") as file:
    pickle.dump(scaler,file)

In [31]:
## Build our ANN model
from tensorflow.keras.layers import Dropout

model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.3),  # Dropout after 1st hidden layer

    Dense(32, activation="relu"),
    Dropout(0.3),  # Dropout after 2nd hidden layer

    Dense(1, activation="sigmoid")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [32]:
## configuring the model  for training process

import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)
loss=tensorflow.keras.losses.BinaryCrossentropy()

model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])


"logs/fit"	This is the base folder where TensorBoard logs will be saved

datetime.datetime.now()	Gets the current date and time

.strftime("%Y%m%d-%H%M%S")	Converts the date-time into a string like 20250724-211512 (YYYYMMDD-HHMMSS format

In [28]:
## Setup the tensor board


from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
log_dir = "logs/fit"+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir = log_dir, histogram_freq = 1)

In [26]:
## Set up early Stopping
early_stopping_callback = EarlyStopping(
    monitor="val_loss",
    patience=3,
    min_delta=0.001,
    restore_best_weights=True,
     verbose=1
)

In [33]:
history = model.fit(
    X_train, 
    y_train,
    validation_data = (X_test,y_test),
    epochs=50,
     batch_size=32,
    callbacks=[tensorflow_callback,
               early_stopping_callback
               ]
    )

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2500 - loss: 0.6931 - val_accuracy: 1.0000 - val_loss: 0.6926
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - accuracy: 0.7500 - loss: 0.6929 - val_accuracy: 1.0000 - val_loss: 0.6921
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.7500 - loss: 0.6926 - val_accuracy: 1.0000 - val_loss: 0.6916
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step - accuracy: 0.7500 - loss: 0.6924 - val_accuracy: 1.0000 - val_loss: 0.6911
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - accuracy: 0.7500 - loss: 0.6921 - val_accuracy: 1.0000 - val_loss: 0.6907
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step - accuracy: 0.7500 - loss: 0.6919 - val_accuracy: 1.0000 - val_loss: 0.6902
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━

In [39]:
model.save('model.h5')



In [38]:
## load tensorboard extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [40]:
%tensorboard --logdir logs/fit20250726-183031

Reusing TensorBoard on port 6007 (pid 13132), started 0:01:28 ago. (Use '!kill 13132' to kill it.)