In [125]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [None]:
##load the dataset

data=pd.read_csv("Churn_Modelling.csv")
data.head()

In [None]:
##drop irrelevant columns
data=data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data

In [None]:
##encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
##onehot encode geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoder

In [None]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

In [None]:
geo_encoded_df=pd.DataFrame(geo_encoder.toarray(),columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

In [None]:
data=pd.concat([data.drop('Geography',axis=1),geo_encoded_df],axis=1)
data.head()

In [133]:
##save the encoders and scalers
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
    
with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)


In [134]:
##divide the data into dependent and independent features
X=data.drop('Exited',axis=1)
Y=data['Exited']

##split the data into training and testing sets
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

##scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [135]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

**ANN IMPLEMENTATION**

In [136]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [None]:
##build an ANN model

model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)),
    Dense(32,activation='relu'),
    Dense(1,activation='sigmoid')
])

In [None]:
model.summary()

In [139]:
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01)

In [140]:
##compile the model
model.compile(optimizer=opt,loss="binary_crossentropy",metrics=['accuracy'])

In [141]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

##set up the tensorboard
log_dir="logs/fit"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

##set up early stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [None]:
##train the model
history=model.fit(
    X_train,Y_train,validation_data=(X_test,Y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

In [None]:
model.save('model.h5')

In [None]:
##load tensorboard extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit20240925-220720
