In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder
import pickle

In [None]:
data=pd.read_csv("Churn_Modelling.csv")
data.head()

In [3]:
# Drop irrelevant columns these are not soo important to train the model
data=data.drop(columns=['RowNumber','CustomerId','Surname'],axis=1)

In [4]:
from sklearn.compose import ColumnTransformer
ohe_geography = OneHotEncoder(drop="first")
oe_gender = OrdinalEncoder()
transf = ColumnTransformer(
    transformers=[
        ('OHE' , ohe_geography , ["Geography"]),
        ("le" , oe_gender, ["Gender"])
    ],remainder="passthrough"
)

In [5]:
data=data.drop('Exited',axis=1) #These are The independent features
data_transformed = transf.fit_transform(data)
features = transf.get_feature_names_out()

#To Get The Proper Names Of The Features
features_cleaned = [name.split("__")[-1] for name in features]
X = pd.DataFrame(data_transformed, columns=features_cleaned)


In [6]:
'''Saving The ColumnTransformer As The Pickle File
We create a pickle file to save Python objects (like models or transformers) in a serialized format so they can be easily reused later 
without redefining or retraining'''
with open('columnTransform.pkl', 'wb') as file:
    pickle.dump(transf, file)


In [None]:
## DiVide the dataset into indepent and dependent features
X=X.drop('Exited',axis=1)
y=data['Exited']

## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [28]:
#Save The scaler module as a pickle file so it can be used later on
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

IMPLEMENTATION OF ANN

In [29]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [None]:
X_train.shape

In [31]:
## Build The ANN  Model
model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## HL1 Connected wwith input layer
    Dense(32,activation='relu'), ## HL2
    Dense(1,activation='sigmoid')  ## output layer Only 1 output becz binary and use sigmoid for binary
]

)

In [32]:
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01) #This Defines The Optimizer We Will use For The backward propogation
loss=tensorflow.keras.losses.BinaryCrossentropy()   #Defines the loss function we will use 


In [33]:
model.compile(optimizer=opt,loss = loss ,metrics=['accuracy']) #compile the model with the specified parameters

In [34]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") #Create logs files inside the specified
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [35]:
"""Set up Early Stopping => stop the training when there is no much difference in the metric for next epocs patience means atleast wait for 
that much epocs and when you stop early apply the best weights """
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [None]:
'''Train the model. here you provide the validation data that will evaluate the model after each epoch you also defined the total no of 
epocs and you defined the callbacks one will create the logs during training and other for early stopping'''
history=model.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

In [None]:
model.save('model.h5') #To Save the model for future use

In [None]:
## Load Tensorboard Extension For Visualization
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit