In [42]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
import numpy as np
import pandas as pd

In [43]:
df=pd.read_csv('Churn_Modelling.csv')
df.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Prepossing The Data

In [44]:
# Drop The Columns Which Are Unique For All Users Like RowNumbeR, CustomerId, Surname :
df=df.drop(['RowNumber','CustomerId','Surname'],axis=1)
df.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [45]:
# Convert Categorical Columns Like Geography & Gender Into Numeric :
le_gender=LabelEncoder() # for Gender Column, Since It Has Only 2 Categories
df['Gender']=le_gender.fit_transform(df['Gender'])

# For Geography Column, Since It Has 3 Categories, We Will Use OneHotEncoding :
one_hot_encoder_geo=OneHotEncoder()
geo_encoder=one_hot_encoder_geo.fit_transform(df['Geography'].values.reshape(-1,1))
geo_encoder=pd.DataFrame(geo_encoder.toarray(),columns=['France','Germany','Spain'])
df=pd.concat([df,geo_encoder],axis=1)
df=df.drop(['Geography'],axis=1) # Drop The Original Geography Column
df.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [46]:
# Save The Encoded Data In Pickle File :
with open('lable_Encoding_gender.pkl','wb') as file:
    pickle.dump(le_gender,file)
    
with open('One_Hot_Encoding_Geography.pkl','wb') as file:
    pickle.dump(geo_encoder,file)    


In [47]:
# Split The Data Into Train & Test :
X=df.drop(['EstimatedSalary'],axis=1)
y=df['EstimatedSalary']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=50)
X_train.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,France,Germany,Spain
5189,826,1,41,5,146466.46,2,0,0,0,0.0,0.0,1.0
7969,637,1,49,2,108204.52,1,1,0,1,0.0,1.0,0.0
9039,545,0,44,1,0.0,2,1,1,0,0.0,0.0,1.0
5208,779,0,38,7,0.0,2,1,1,0,0.0,0.0,1.0
506,691,1,30,7,116927.89,1,1,0,0,0.0,1.0,0.0


In [48]:
# Standardization (or z-score normalization) The Features Data :
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

# Convert the standardized data back to a pandas DataFrame Just For See The Data :
x=pd.DataFrame(X_train,columns=X.columns)
x

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,France,Germany,Spain
0,1.824583,0.903490,0.219906,-0.002572,1.129181,0.809267,-1.540615,-1.029875,-0.508468,-1.006018,-0.576470,1.743341
1,-0.122525,0.903490,0.989046,-1.041284,0.516851,-0.913102,0.649092,-1.029875,1.966691,-1.006018,1.734694,-0.573611
2,-1.070324,-1.106819,0.508333,-1.387521,-1.214815,0.809267,0.649092,0.970992,-0.508468,-1.006018,-0.576470,1.743341
3,1.340382,-1.106819,-0.068522,0.689902,-1.214815,0.809267,0.649092,0.970992,-0.508468,-1.006018,-0.576470,1.743341
4,0.433792,0.903490,-0.837663,0.689902,0.656456,-0.913102,0.649092,-1.029875,-0.508468,-1.006018,1.734694,-0.573611
...,...,...,...,...,...,...,...,...,...,...,...,...
6995,0.907691,-1.106819,-0.549235,-0.348809,-1.214815,0.809267,0.649092,-1.029875,-0.508468,0.994018,-0.576470,-0.573611
6996,-1.966612,0.903490,-0.356950,1.382377,1.134207,0.809267,0.649092,0.970992,-0.508468,-1.006018,1.734694,-0.573611
6997,0.052612,0.903490,1.565902,-0.695047,0.185274,-0.913102,0.649092,0.970992,-0.508468,0.994018,-0.576470,-0.573611
6998,-2.615648,-1.106819,-0.453093,-0.002572,0.481756,-0.913102,0.649092,0.970992,1.966691,-1.006018,1.734694,-0.573611


In [49]:
# Save The StandardScaler Object In Pickle File :
with open('Feature_scaling.pkl','wb') as file:
    pickle.dump(sc,file)

# ANN Implementation


In [50]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import relu
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

from tensorflow.keras.initializers import he_uniform # For Weight Initialization

from tensorflow.keras.layers import BatchNormalization 
from tensorflow.keras.regularizers import l2 # For Regularization
from tensorflow.keras.layers import Dropout # For Regularization

import datetime

In [51]:
from tensorflow.keras.layers import BatchNormalization, Dropout

# Model Building :

model=Sequential([
    Dense(512,
          activation='relu',
          kernel_initializer=he_uniform(),
          kernel_regularizer=l2(0.00015342300377368197),
          input_dim=X_train.shape[1]),
    BatchNormalization(),
    Dropout(0.24011218896979006),
    
    Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [57]:
# Model Summary :
model.summary()

In [58]:
# Compile The Model :
model.compile(optimizer=RMSprop(learning_rate=0.008973549805486144), loss='mean_squared_error', metrics=['mse'])

In [59]:
# Set Up TensorBoard :
log_dir='logs/fit_Regression'+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [60]:
# Set Up Early Stopping :
early_stop=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [61]:
# Train The Model :
model.fit(X_train,y_train,
          validation_data=(X_test,y_test),
          epochs=20,batch_size=32,
          callbacks=[tensorboard_callback,early_stop])

Epoch 1/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 13429215232.0000 - mse: 13429215232.0000 - val_loss: 12851752960.0000 - val_mse: 12851752960.0000
Epoch 2/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 12603457536.0000 - mse: 12603457536.0000 - val_loss: 11633018880.0000 - val_mse: 11633018880.0000
Epoch 3/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 11255797760.0000 - mse: 11255797760.0000 - val_loss: 9872754688.0000 - val_mse: 9872754688.0000
Epoch 4/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 9448809472.0000 - mse: 9448809472.0000 - val_loss: 7789811712.0000 - val_mse: 7789811712.0000
Epoch 5/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 7359943680.0000 - mse: 7359943680.0000 - val_loss: 5739863552.0000 - val_mse: 5739863552.0000
Epoch 6/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e0e6665c10>

In [62]:
# Save The Model :
model.save('Updated_ANN_regression_model.h5')



In [63]:
# Lode The TensorBoard :
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [65]:
%tensorboard --logdir logs/fit_Regression20250124-235154

In [27]:
# Model Evaluation :
test_loss, test_mae=model.evaluate(X_test,y_test)
print('Test Data Mean Absolute Error :',test_loss)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 49687.5664 - mae: 49687.5664
Test Data Mean Absolute Error : 50176.9296875
