In [None]:
import pickle

import pandas as pd

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [None]:
!pip install --upgrade keras

import keras

print(keras.__version__)

In [None]:
## Load the dataset
data = pd.read_csv(
    "/Users/sunnythesage/PythonProjects/Data-Science-BootCamp/03-Deep-Learning-BootCamp/7 - End to End Deep Learning Project Using ANN/advanced-customer-churn-analysis-using-ann/data/raw/churn-modelling-dataset.csv")

data.head()

### Data Preprocessing

In [None]:
# rop irrelevant columns

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1)
data

In [None]:
## Encode categorical variables

label_encoder_gender = LabelEncoder()

data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
## Onehot encode Geography column
from sklearn.preprocessing import OneHotEncoder

onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']]).toarray()
geo_encoder

In [None]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

In [None]:
geo_encoded_df = pd.DataFrame(geo_encoder, columns = onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df

In [None]:
## Combine one hot encoder columns with the original data
data = pd.concat([data.drop('Geography', axis = 1), geo_encoded_df], axis = 1)
data.head()

In [None]:
## Save the encoders and scaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)


In [None]:
data.head()

In [None]:
## Divide the dataset into independent and dependent features
X = data.drop('Exited', axis = 1)
y = data['Exited']

## Split the data in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Scale these features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
X_train

In [None]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [None]:
data

### ANN Implementation

In [None]:
from keras import layers

In [None]:
from keras import Sequential
import datetime

In [None]:
(X_train.shape[1],)

In [None]:
## Build Our ANN Model
model = Sequential([
    layers.Dense(64, activation = 'relu', input_shape = (X_train.shape[1],)),  ## HL1 Connected with input layer
    layers.Dense(32, activation = 'relu'),  ## HL2
    layers.Dense(1, activation = 'sigmoid')  ## output layer
]

)

In [None]:
model.summary()

In [None]:
opt = keras.optimizers.Adam(learning_rate = 0.01)
loss = keras.losses.BinaryCrossentropy()
loss

In [None]:
## compile the model
model.compile(optimizer = opt, loss = "binary_crossentropy", metrics = ['accuracy'])

In [None]:
## Set up the Tensorboard

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = keras.callbacks.TensorBoard(log_dir = log_dir, histogram_freq = 1)

In [None]:
## Set up Early Stopping
early_stopping_callback = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10,
                                                        restore_best_weights = True)


In [None]:
### Train the model
history = model.fit(
    X_train, y_train, validation_data = (X_test, y_test), epochs = 100,
    callbacks = [tensorflow_callback, early_stopping_callback]
)

In [None]:
model.save('model.h5')

In [None]:
## Load Tensorboard Extension
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit