In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

: 

In [None]:
data = pd.read_csv("Churn_Modelling.csv")
data

In [None]:
## Preprocessing the data
# Drop irrelevant columns
data = data.drop(['RowNumber','CustomerId','Surname'], axis=1)
data

In [None]:
## Encode categorical variables
label_encoder_gender = LabelEncoder()
data['Gender']= label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
## Onehot encode 'Geography
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder

In [None]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

In [None]:
geo_encoded_df=pd.DataFrame(geo_encoder,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

In [None]:
## combine one hot encoded columns with original data
data = pd.concat([data.drop(['Geography'], axis=1), geo_encoded_df], axis=1)
data

In [None]:
 ## Save the encoders
 with open('label_encoder_gender.pkl', 'wb') as file:
     pickle.dump(label_encoder_gender, file)
     
 with open('one_hot_encoder_geo.pkl', 'wb') as file:
     pickle.dump(onehot_encoder_geo, file)
     

In [None]:
## Divide the dataset into independent and dependent features
X = data.drop('Exited', axis=1)
y = data['Exited']

## Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
## Save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [None]:
## ANN Implementation

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

import datetime

In [None]:
X_train.shape[1]

In [None]:
(X_train.shape[1],0)

In [None]:
## Build ANN Model

model = Sequential([
     Dense(64, activation='relu', input_shape=(X_train.shape[1],)), ## HL1 connected with input layer
     Dense(32, activation='relu'), ## HL2
     Dense(1, activation='sigmoid') ## Output layer
])

In [None]:
model.summary()

In [None]:
## Optimizers

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
loss = tf.keras.losses.BinaryCrossentropy()

In [None]:
## compile the model

model.compile(optimizer=opt, loss = loss, metrics=['accuracy'])

In [None]:
## Setup the Tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
## Setup Early Stopping - 
# This is to stop the training if the model if there is no change in the loss value or no improvements. 
# So that it doesn't need to continue with all the epochs.

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
## Train the Model

history=model.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

In [None]:
## Save model file

model.save('model.keras')

In [None]:
## Load Tensord Extension
%load_ext tensorboard



In [None]:
%tensorboard --logdir logs/fit