## Imports

In [1]:
import datetime
import pickle

import pandas as pd
from keras.callbacks import EarlyStopping, TensorBoard
from keras.layers import Dense, Input
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
%env TF_ENABLE_ONEDNN_OPTS=1

env: TF_ENABLE_ONEDNN_OPTS=1


## Load the dataset

In [3]:
data = pd.read_csv('../data/Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Preprocess the data

### Define column transformations

In [4]:
numeric_features = [
    "CreditScore",
    "Age",
    "Tenure",
    "Balance",
    "NumOfProducts",
    "HasCrCard",
    "IsActiveMember",
    "EstimatedSalary",
]
categorical_features = ["Geography", "Gender"]

In [5]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(drop='first', dtype='int8'), categorical_features)
    ],
    remainder='drop',
    n_jobs=-1
)

### Create pipeline

In [6]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

### Split the data into train and test sets

In [7]:
X = data.drop(columns=["Exited"])
y = data["Exited"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

### Fit and transform data

In [8]:
X_train_transformed = pipeline.fit(X_train)

X_train_transformed = pipeline.transform(X_train)
X_test_transformed = pipeline.transform(X_test)

### Save pipeline to pickle

In [9]:
with open('../models/preprocessing_pipeline.pkl', 'wb') as f:
    pickle.dump(pipeline, f, protocol=5)

## Model Training

### Define the Neural Network

In [10]:
model = Sequential([
    Input(shape=(X_train_transformed.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [11]:
model.summary()

### Compile the model

In [12]:
optimizer = Adam(learning_rate=0.01)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

### Setup Callbacks

In [13]:
log_dir = f"../logs/fit/{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"

callbacks = [
    TensorBoard(log_dir=log_dir, histogram_freq=1),
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
]

### Train the model

In [14]:
history = model.fit(
    X_train_transformed, y_train,
    validation_split=0.2,
    epochs=100,
    callbacks=callbacks
)

Epoch 1/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7983 - loss: 0.4660 - val_accuracy: 0.8562 - val_loss: 0.3664
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8456 - loss: 0.3698 - val_accuracy: 0.8719 - val_loss: 0.3354
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8504 - loss: 0.3573 - val_accuracy: 0.8694 - val_loss: 0.3337
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8452 - loss: 0.3622 - val_accuracy: 0.8675 - val_loss: 0.3331
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8545 - loss: 0.3424 - val_accuracy: 0.8612 - val_loss: 0.3373
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8517 - loss: 0.3550 - val_accuracy: 0.8700 - val_loss: 0.3355
Epoch 7/100
[1m200/20

### Evaluate on Test set

In [15]:
model.evaluate(X_test_transformed, y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8783 - loss: 0.3158


[0.3416936695575714, 0.8634999990463257]

### Save the model

In [16]:
model.save('../models/churn_model.keras')

### Load Tensorboard Extension

In [16]:
%load_ext tensorboard
%tensorboard --logdir ../logs/fit/ --host localhost --port 8888

Launching TensorBoard...