In [9]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import pickle

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam

In [None]:
#### Variables ####

from CONFIG import features, directories

# Directories
log_dir = directories["log_dir"]
model_dir = directories["model_dir"]

# Features
categorical_variables = features["categorical_variables"]
numerical_variables = features["numerical_variables"]
binary_variables =  features["binary_variables"]

#### Importing Data

In [4]:
# Importing data from csv
df = pd.read_csv("Churn_Modelling.csv")
X = df.iloc[:, 3:-1]
y = df.iloc[:, -1]
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [5]:
# Train test split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.2, random_state=0)

#### Preprocessing

In [6]:
# One hot encoding categorical variables
one_hot_encoder = OneHotEncoder(sparse_output=False)
one_hot_encoder = one_hot_encoder.fit(X_train[categorical_variables])
categorical_X_train_encoded = pd.DataFrame(one_hot_encoder.transform(X_train[categorical_variables]))
categorical_X_test_encoded = pd.DataFrame(one_hot_encoder.transform(X_test[categorical_variables]))

# Min Max scaling numerical variables
min_max_scaler = MinMaxScaler()
min_max_scaler = min_max_scaler.fit(X_train[numerical_variables])
numerical_X_train_standardized = pd.DataFrame(min_max_scaler.transform(X_train[numerical_variables]))
numerical_X_test_standardized = pd.DataFrame(min_max_scaler.transform(X_test[numerical_variables]))

In [7]:
# Saving the encoder and scaler
with open("one_hot_encoder.pkl", "wb") as f:
    pickle.dump(one_hot_encoder, f)

with open("min_max_scaler.pkl", "wb") as f:
    pickle.dump(min_max_scaler, f)

In [24]:
# Combining train and test preprocessed data
X_train_pre = pd.concat([X_train[binary_variables].reset_index(drop=True), categorical_X_train_encoded, numerical_X_train_standardized], axis=1)
X_test_pre = pd.concat([X_test[binary_variables].reset_index(drop=True), categorical_X_test_encoded, numerical_X_test_standardized], axis = 1)

In [11]:
X_train_pre

Unnamed: 0,HasCrCard,IsActiveMember,0,1,2,3,4,0.1,1.1,2.1,3.1,4.1,5
0,1,0,0.0,0.0,1.0,1.0,0.0,0.634,0.228571,0.5,0.000000,0.333333,0.819174
1,1,1,0.0,1.0,0.0,0.0,1.0,0.154,0.342857,0.1,0.317473,0.000000,0.285459
2,1,0,1.0,0.0,0.0,1.0,0.0,0.370,0.157143,0.2,0.471364,0.000000,0.928185
3,1,0,0.0,0.0,1.0,0.0,1.0,0.608,0.314286,0.5,0.443327,0.000000,0.868110
4,1,1,0.0,0.0,1.0,1.0,0.0,1.000,0.557143,0.8,0.531808,0.333333,0.661498
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,1,1,0.0,1.0,0.0,1.0,0.0,0.488,0.200000,0.4,0.503696,0.333333,0.814829
7996,1,1,0.0,0.0,1.0,1.0,0.0,0.888,0.057143,0.4,0.480060,0.000000,0.538759
7997,1,0,1.0,0.0,0.0,0.0,1.0,0.776,0.242857,0.5,0.676520,0.333333,0.907178
7998,1,1,0.0,0.0,1.0,1.0,0.0,0.480,0.285714,0.9,0.000000,0.333333,0.743764


#### Neural Net

In [None]:
# Variables
lr = 0.01
num_epochs = 100
loss = BinaryCrossentropy()
metrics = ["accuracy"]

log_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

In [None]:
# Building and compiling model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_pre.shape[1], )),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid'),
])

model.compile(optimizer=Adam(learning_rate=lr), loss = loss, metrics=metrics)
model.summary()

In [33]:
# Training model
history = model.fit(
    X_train_pre,
    y_train,
    validation_data=(X_test_pre, y_test),
    epochs=num_epochs,
    callbacks= [log_callback, early_stopping_callback]
    )

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8001 - loss: 0.4633 - val_accuracy: 0.8210 - val_loss: 0.4354
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8201 - loss: 0.4271 - val_accuracy: 0.8275 - val_loss: 0.4122
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8379 - loss: 0.3923 - val_accuracy: 0.8615 - val_loss: 0.3606
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8490 - loss: 0.3667 - val_accuracy: 0.8595 - val_loss: 0.3591
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8534 - loss: 0.3614 - val_accuracy: 0.8580 - val_loss: 0.3502
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8544 - loss: 0.3551 - val_accuracy: 0.8625 - val_loss: 0.3471
Epoch 7/100
[1m250/2

In [36]:
model.save(f"{model_dir}/model_1.keras")

In [38]:
# Load Tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
