In [1]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('Churn_Modelling.csv')
df.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Prepossing The Data

In [3]:
# Drop The Columns Which Are Unique For All Users Like RowNumbeR, CustomerId, Surname :
df=df.drop(['RowNumber','CustomerId','Surname'],axis=1)
df.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Convert Categorical Columns Like Geography & Gender Into Numeric :
le_gender=LabelEncoder() # for Gender Column, Since It Has Only 2 Categories
df['Gender']=le_gender.fit_transform(df['Gender'])

# For Geography Column, Since It Has 3 Categories, We Will Use OneHotEncoding :
one_hot_encoder_geo=OneHotEncoder()
geo_encoder=one_hot_encoder_geo.fit_transform(df['Geography'].values.reshape(-1,1))
geo_encoder=pd.DataFrame(geo_encoder.toarray(),columns=['France','Germany','Spain'])
df=pd.concat([df,geo_encoder],axis=1)
df=df.drop(['Geography'],axis=1) # Drop The Original Geography Column
df.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [5]:
# Save The Encoded Data In Pickle File :
with open('lable_Encoding_gender.pkl','wb') as file:
    pickle.dump(le_gender,file)
    
with open('One_Hot_Encoding_Geography.pkl','wb') as file:
    pickle.dump(geo_encoder,file)    


In [6]:
# Split The Data Into Train & Test :
X=df.drop(['Exited'],axis=1)
y=df['Exited']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=50)
X_train.head(5)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,France,Germany,Spain
5189,826,1,41,5,146466.46,2,0,0,180934.67,0.0,0.0,1.0
7969,637,1,49,2,108204.52,1,1,0,169037.84,0.0,1.0,0.0
9039,545,0,44,1,0.0,2,1,1,82614.89,0.0,0.0,1.0
5208,779,0,38,7,0.0,2,1,1,138542.87,0.0,0.0,1.0
506,691,1,30,7,116927.89,1,1,0,21198.39,0.0,1.0,0.0


In [7]:
# Standardization (or z-score normalization) The Features Data :
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

# Convert the standardized data back to a pandas DataFrame Just For See The Data :
x=pd.DataFrame(X_train,columns=X.columns)
x

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,France,Germany,Spain
0,1.824583,0.903490,0.219906,-0.002572,1.129181,0.809267,-1.540615,-1.029875,1.405186,-1.006018,-0.576470,1.743341
1,-0.122525,0.903490,0.989046,-1.041284,0.516851,-0.913102,0.649092,-1.029875,1.198486,-1.006018,1.734694,-0.573611
2,-1.070324,-1.106819,0.508333,-1.387521,-1.214815,0.809267,0.649092,0.970992,-0.303060,-1.006018,-0.576470,1.743341
3,1.340382,-1.106819,-0.068522,0.689902,-1.214815,0.809267,0.649092,0.970992,0.668654,-1.006018,-0.576470,1.743341
4,0.433792,0.903490,-0.837663,0.689902,0.656456,-0.913102,0.649092,-1.029875,-1.370135,-1.006018,1.734694,-0.573611
...,...,...,...,...,...,...,...,...,...,...,...,...
6995,0.907691,-1.106819,-0.549235,-0.348809,-1.214815,0.809267,0.649092,-1.029875,0.261615,0.994018,-0.576470,-0.573611
6996,-1.966612,0.903490,-0.356950,1.382377,1.134207,0.809267,0.649092,0.970992,-1.678044,-1.006018,1.734694,-0.573611
6997,0.052612,0.903490,1.565902,-0.695047,0.185274,-0.913102,0.649092,0.970992,-1.681125,0.994018,-0.576470,-0.573611
6998,-2.615648,-1.106819,-0.453093,-0.002572,0.481756,-0.913102,0.649092,0.970992,-1.436536,-1.006018,1.734694,-0.573611


In [8]:
# Save The StandardScaler Object In Pickle File :
with open('Feature_scaling.pkl','wb') as file:
    pickle.dump(sc,file)

# ANN Implementation


In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import relu
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

from tensorflow.keras.initializers import he_uniform # For Weight Initialization

from tensorflow.keras.layers import BatchNormalization 
from tensorflow.keras.regularizers import l2 # For Regularization
from tensorflow.keras.layers import Dropout # For Regularization

import datetime

In [13]:
# Model Building :
model=Sequential([
    Dense(128,
          activation='relu',
          kernel_initializer=he_uniform(),
          kernel_regularizer=l2(0.00018542078372695),
          input_dim=X_train.shape[1]),
    BatchNormalization(),
    Dropout(0.38247415095870707),
    
    Dense(1,activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
# Model Summary :
model.summary()

In [15]:
# Compile The Model :
model.compile(optimizer=RMSprop(learning_rate=0.0009715619067182132), loss='binary_crossentropy', metrics=['accuracy'])

In [16]:
# Set Up TensorBoard :
log_dir='logs/fit_Classification'+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [17]:
# Set Up Early Stopping :
early_stop=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [18]:
# Train The Model :
history = model.fit(X_train,y_train,
                   validation_data=(X_test,y_test),
                   epochs=40,  # Increased epochs, early stopping will prevent overfitting
                   batch_size=32,
                   verbose=1,
                   callbacks=[tensorboard_callback,early_stop])

Epoch 1/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.6687 - loss: 0.6965 - val_accuracy: 0.8217 - val_loss: 0.4685
Epoch 2/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7910 - loss: 0.5175 - val_accuracy: 0.8383 - val_loss: 0.4464
Epoch 3/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8062 - loss: 0.4937 - val_accuracy: 0.8400 - val_loss: 0.4366
Epoch 4/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8208 - loss: 0.4569 - val_accuracy: 0.8413 - val_loss: 0.4295
Epoch 5/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8227 - loss: 0.4522 - val_accuracy: 0.8497 - val_loss: 0.4188
Epoch 6/40
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8313 - loss: 0.4386 - val_accuracy: 0.8480 - val_loss: 0.4159
Epoch 7/40
[1m219/219[0m 

In [19]:
# Save The Model :
model.save('Updated_ANN_classification_model.h5')



In [20]:
# Lode The TensorBoard :
%load_ext tensorboard

In [21]:
%tensorboard --logdir logs/fit_Classification20250125-135713

In [27]:
# Model Evaluation :
test_loss, test_mae=model.evaluate(X_test,y_test)
print('Test Data Mean Absolute Error :',test_loss)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 49687.5664 - mae: 49687.5664
Test Data Mean Absolute Error : 50176.9296875
