In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import pickle

In [None]:
df=pd.read_csv(r"D:\DS ML NLP bootcamp\ANN-project\Churn_Modelling.csv")
df.head(5)

#### Preprocessing
- Drop Irrelevant Information
- Categorical variabless to Numeric (Encoding)

In [None]:
columns=["RowNumber", "CustomerId", "Surname"] # irrelevant information
df=df.drop(columns, axis=1) 

In [None]:
#encoding categorical variables
label_encoder_gender=LabelEncoder()
df['Gender']=label_encoder_gender.fit_transform(df['Gender'])

In [None]:
#onehot Geography
onehot_encoder_geography=OneHotEncoder() 
geo_encoder=onehot_encoder_geography.fit_transform(df[['Geography']])
geo_encoder

In [None]:
geo_encoded_df=pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geography.get_feature_names_out(['Geography']) )
geo_encoded_df

In [None]:
# combine categorical encoded features with original features
df=pd.concat([df.drop('Geography', axis=1), geo_encoded_df], axis=1)
df.head()

#### All of the features in dataframe are now in numerical format

In [None]:
# data set processing for prediction
X=df.drop('Exited', axis=1)
y=df['Exited']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, random_state=42)

scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
# save encoder and scaler for prediction and further usage
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender,file)
    
with open('onehot_encoder_geography.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geography,file)
    
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler,file)

## ANN Implementation

In [None]:
import datetime
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

In [None]:
model=Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1], )), # hidden layer 1 (connected with input layer)
    Dense(32, activation='relu'), # hidden layer 2
    Dense(1, activation='sigmoid') # output layer
])

In [None]:
model.summary()

In [None]:
# compile model
opt=tf.keras.optimizers.Adam(learning_rate=0.01)
loss=tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [29]:
#setup the tensor board
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
#set up early stopping
early_stopping_callback=EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# train the model
history=model.fit(
    X_train, y_train, validation_data=(X_test, y_test), epochs=100,
    callbacks=[tensorflow_callback, early_stopping_callback]
)

In [None]:
model.save("model.h5")

In [None]:
# load tensorboard extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit