In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

## Feature Engineering

In [None]:
data = pd.read_csv("Churn_Modelling.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
# Preprocessing the data

data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1)
data

In [None]:
# Encoding the Gender Feature

genderEncoder = LabelEncoder()
data['Gender'] = genderEncoder.fit_transform(data['Gender'])
data

In [None]:
# One Hot Encoding for 'Geographical' Column

from sklearn.preprocessing import OneHotEncoder

geoOHE = OneHotEncoder()
geoEncoder = geoOHE.fit_transform(data[['Geography']])
geoEncoder

In [None]:
geoOHE.get_feature_names_out(['Geography'])

In [None]:
geoEncoder.toarray()

In [None]:
geoEncodedDF = pd.DataFrame(geoEncoder.toarray(), columns = geoOHE.get_feature_names_out(['Geography']))
geoEncodedDF

In [None]:
# Combine all the columns

data = pd.concat([data.drop('Geography', axis = 1), geoEncodedDF], axis = 1)
data.head()

In [None]:
# Save the encoder and scaler

with open('genderEncoder.pkl', 'wb') as file:
    pickle.dump(genderEncoder, file)

with open('geoOHE.pkl', 'wb') as file:
    pickle.dump(geoOHE, file)

In [None]:
data.head()

In [None]:
# Divide the dataset into Dependent and Independent Variables

X = data.drop('Exited', axis = 1)
y = data['Exited']

In [None]:
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
X_train

In [None]:
X_test

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

In [None]:
X_test

In [None]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

## Artificial Neural Network Implementation

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [None]:
# Define the ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),  # HL1 - Connected with input layer
    Dense(32, activation='relu'),                                   # HL2 - Connected with HL1
    Dense(1, activation='sigmoid')                                  # Output Layer
])

model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [None]:
# Compile the Model

model.compile(
    optimizer = optimizer, 
    loss = loss,
    metrics = ['accuracy']
)

In [None]:
logDirectory = "logs/fit/" + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')


In [None]:
# Setup the Tensorboard and Early Stopping

from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

tfCallback = TensorBoard(log_dir = logDirectory, histogram_freq = 1)

earlyStoppingCallback = EarlyStopping(
    monitor = 'val_loss', 
    patience = 10, 
    restore_best_weights = True
)


In [None]:
# Train The Model

history = model.fit(
    X_train, y_train, validation_data = (X_test, y_test), epochs = 100,
    callbacks = [tfCallback, earlyStoppingCallback]
)

In [None]:
# save the model

model.save('model.h5')

In [None]:
# Load the TensorBoard

%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit