In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [None]:
data=pd.read_csv("Churn_Modelling.csv")
data.head()

In [None]:
## Preprocess the data
## Drop irrevelent features
data=data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data

In [None]:
## Encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
## Once Hot Encode 'Geography'
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoder

In [None]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

In [None]:
geo_encoded_df=pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [None]:
geo_encoded_df

In [None]:
# Combine all the columns with the original data
data=pd.concat([data.drop(['Geography'], axis=1), geo_encoded_df], axis=1)
data.head()

In [None]:
# Save the encoder and scaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

In [None]:
# Divide the data set into independent features
X=data.drop("Exited", axis=1)
y=data["Exited"]

# Split the data in traing and testing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
X_train

In [None]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

### ANN Implementation

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [None]:
X_train.shape[1]

In [None]:
## Build an ANN Model

model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), # HL1 Connected with input layer
    Dense(32,activation='relu'), # HL2
    Dense(1,activation='sigmoid') # output layer
])

In [None]:
model.summary()

In [None]:
opt=tf.keras.optimizers.Adam(learning_rate=0.1)
loss=tf.keras.losses.BinaryCrossentropy
loss

In [None]:
# Compile the model

model.compile(optimizer=opt, loss="binary_crossentropy",metrics=['accuracy'])

In [None]:
# Setup the tensorboard

log_directory='logs/fit/'+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tf_callback=TensorBoard(log_dir=log_directory, histogram_freq=1)

In [None]:
# Setup Early stopping
early_stopping=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [None]:
# Traing the model
history=model.fit(
    X_train,y_train,validation_data=(X_test, y_test),epochs=100,
    callbacks=[tf_callback,early_stopping]
)

In [None]:
model.save("model.h5")

In [None]:
# Load Tensorboard extension

%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit20250813-163832/