# Titanic Competition - Deep Learning


In [1]:
# Hyperparameters
EPOCHS = 100
LR = 0.01
BATCH_SIZE = 64
DROPOUT = 0.3
L2 = 0.01
MOMENTUM = 0.9
DECAY = 0.01

In [2]:
# Setup plotting
import matplotlib.pyplot as plt

plt.style.use("seaborn-v0_8-whitegrid")
# Set Matplotlib defaults
plt.rc("figure", autolayout=True)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=18,
    titlepad=10,
)
plt.rc("animation", html="html5")

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import KFold

from statistics import mean

import os

from utils import (
    preprocess_data,
    MyModel,
    model_init,
)

## Preprocessing


In [3]:
# Read the data
train_data = pd.read_csv("../input/train.csv").set_index("PassengerId")
test_data = pd.read_csv("../input/test.csv").set_index("PassengerId")

X, y, X_test = preprocess_data(
    train_data,
    test_data,
    label_value="Survived",
    cols_to_drop=["Name", "Ticket", "Cabin"],
)

## Define the model


In [4]:
input_shape = [X.shape[1]]
model = MyModel()

## Start testing


Parameters to Tune:

- L2 regularization strength: 0.001, 0.005, 0.01, 0.05
- Dropout rate: 0.2, 0.3, 0.4, 0.5
- Learning rate: 0.01, 0.001, 0.0001
- Momentum: 0.8, 0.9
- Weight decay: 1e-4, 1e-5
  - Learning rate scheduler: Experiment with different decay schedules or use adaptive learning rates (e.g., ReduceLROnPlateau)
- Batch size: 16, 32, 64


In [5]:
# Define the K-Fold cross-validator (K=5 in this example)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# DataFrame that saves the parameters with accuracies
accuracies_df = pd.DataFrame(
    columns=[
        "learning_rate",
        "batch_size",
        "l2_regularization",
        "dropout_rate",
        "momentum",
        "weight_decay",
        "accuracy",
    ]
)

## Key parameter tuning

learning_rate, batch_size


In [6]:
lr_values = [0.1, 0.05, 0.01, 0.005, 0.001]
batch_values = [32, 64]

num_iter = len(lr_values) * len(batch_values)

i = 1

for lr in lr_values:
    for batch in batch_values:
        acc_list = []
        print(f"----------------------- {i}/{num_iter} -----------------------")
        for train_index, val_index in kf.split(X):
            # Split the data into training and testing sets
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Init the model in every iteration
            model, early_stopping, lrs = model_init(
                MyModel(l2=L2, dropout=DROPOUT), lr, MOMENTUM, DECAY
            )

            history = model.fit(
                X_train,
                y_train,
                validation_data=(X_val, y_val),
                batch_size=batch,
                epochs=EPOCHS,
                callbacks=[early_stopping, lrs],
                verbose=0,
            )

            acc_list.append(history.history["binary_accuracy"][-1])

        print(f"learning_rate: {lr}\t batch_size: {batch}\t accuracy: {mean(acc_list)}")

        # Create a DataFrame for the new row
        new_row = pd.DataFrame(
            [[lr, batch, mean(acc_list)]],
            columns=[
                "learning_rate",
                "batch_size",
                "accuracy",
            ],
        )

        # Append the row using pd.concat()
        accuracies_df = pd.concat([accuracies_df, new_row], ignore_index=True)

        i += 1

----------------------- 1/10 -----------------------
learning_rate: 0.1	 batch_size: 32	 accuracy: 0.8268827676773072
----------------------- 2/10 -----------------------


  accuracies_df = pd.concat([accuracies_df, new_row], ignore_index=True)


learning_rate: 0.1	 batch_size: 64	 accuracy: 0.8324901223182678
----------------------- 3/10 -----------------------
learning_rate: 0.05	 batch_size: 32	 accuracy: 0.8277254581451416
----------------------- 4/10 -----------------------
learning_rate: 0.05	 batch_size: 64	 accuracy: 0.8383834719657898
----------------------- 5/10 -----------------------
learning_rate: 0.01	 batch_size: 32	 accuracy: 0.8417491436004638
----------------------- 6/10 -----------------------
learning_rate: 0.01	 batch_size: 64	 accuracy: 0.828836464881897
----------------------- 7/10 -----------------------
learning_rate: 0.005	 batch_size: 32	 accuracy: 0.8316497802734375
----------------------- 8/10 -----------------------
learning_rate: 0.005	 batch_size: 64	 accuracy: 0.8319294929504395
----------------------- 9/10 -----------------------
learning_rate: 0.001	 batch_size: 32	 accuracy: 0.8288451194763183
----------------------- 10/10 -----------------------
learning_rate: 0.001	 batch_size: 64	 accuracy

In [7]:
accuracies_df.sort_values(by="accuracy", ascending=False)

Unnamed: 0,learning_rate,batch_size,l2_regularization,dropout_rate,momentum,weight_decay,accuracy
4,0.01,32,,,,,0.841749
3,0.05,64,,,,,0.838383
1,0.1,64,,,,,0.83249
7,0.005,64,,,,,0.831929
6,0.005,32,,,,,0.83165
9,0.001,64,,,,,0.829122
8,0.001,32,,,,,0.828845
5,0.01,64,,,,,0.828836
2,0.05,32,,,,,0.827725
0,0.1,32,,,,,0.826883


In [8]:
accuracies_df.to_csv("output/accuracies_tuning.csv")

Extract the parameters giving the highest value


In [9]:
accuracies_df_max_row = accuracies_df.loc[
    accuracies_df["accuracy"] == max(accuracies_df["accuracy"])
]
best_lr = accuracies_df_max_row["learning_rate"].iloc[0]
best_batch = accuracies_df_max_row["batch_size"].iloc[0]

## Fine-tuning

### First stage

l2_regularization, dropout_rate


In [10]:
# DataFrame that saves the parameters with accuracies for the first fine-tuning
accuracies_ft1_df = pd.DataFrame(
    columns=[
        "learning_rate",
        "batch_size",
        "l2_regularization",
        "dropout_rate",
        "momentum",
        "weight_decay",
        "accuracy",
    ]
)

In [11]:
l2_values = [0.005, 0.008, 0.01]
dropout_values = [0.1, 0.15, 0.2, 0.25]

num_iter = len(l2_values) * len(dropout_values)

i = 1

for l2 in l2_values:
    for dropout in dropout_values:
        acc_list = []
        print(f"----------------------- {i}/{num_iter} -----------------------")
        for train_index, val_index in kf.split(X):
            # Split the data into training and testing sets
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Init the model in every iteration
            model, early_stopping, lrs = model_init(
                MyModel(l2=l2, dropout=dropout), best_lr, MOMENTUM, DECAY
            )

            history = model.fit(
                X_train,
                y_train,
                validation_data=(X_val, y_val),
                batch_size=best_batch,
                epochs=EPOCHS,
                callbacks=[early_stopping, lrs],
                verbose=0,
            )

            acc_list.append(history.history["binary_accuracy"][-1])

        print(
            f"l2_regularization: {l2}\t dropout_rate: {dropout}\t accuracy: {mean(acc_list)}"
        )

        # Create a DataFrame for the new row
        new_row = pd.DataFrame(
            [[best_lr, best_batch, l2, dropout, mean(acc_list)]],
            columns=[
                "learning_rate",
                "batch_size",
                "l2_regularization",
                "dropout_rate",
                "accuracy",
            ],
        )

        # Append the row using pd.concat()
        accuracies_ft1_df = pd.concat([accuracies_ft1_df, new_row], ignore_index=True)

        i += 1

----------------------- 1/12 -----------------------
l2_regularization: 0.005	 dropout_rate: 0.1	 accuracy: 0.855497419834137
----------------------- 2/12 -----------------------


  accuracies_ft1_df = pd.concat([accuracies_ft1_df, new_row], ignore_index=True)


l2_regularization: 0.005	 dropout_rate: 0.15	 accuracy: 0.8392257690429688
----------------------- 3/12 -----------------------
l2_regularization: 0.005	 dropout_rate: 0.2	 accuracy: 0.8448346972465515
----------------------- 4/12 -----------------------
l2_regularization: 0.005	 dropout_rate: 0.25	 accuracy: 0.8456789493560791
----------------------- 5/12 -----------------------
l2_regularization: 0.008	 dropout_rate: 0.1	 accuracy: 0.845396089553833
----------------------- 6/12 -----------------------
l2_regularization: 0.008	 dropout_rate: 0.15	 accuracy: 0.8425934076309204
----------------------- 7/12 -----------------------
l2_regularization: 0.008	 dropout_rate: 0.2	 accuracy: 0.8439955353736878
----------------------- 8/12 -----------------------
l2_regularization: 0.008	 dropout_rate: 0.25	 accuracy: 0.8428683996200561
----------------------- 9/12 -----------------------
l2_regularization: 0.01	 dropout_rate: 0.1	 accuracy: 0.841187345981598
----------------------- 10/12 ------

In [12]:
accuracies_ft1_df.sort_values(by="accuracy", ascending=False)

Unnamed: 0,learning_rate,batch_size,l2_regularization,dropout_rate,momentum,weight_decay,accuracy
0,0.01,32,0.005,0.1,,,0.855497
9,0.01,32,0.01,0.15,,,0.847081
3,0.01,32,0.005,0.25,,,0.845679
4,0.01,32,0.008,0.1,,,0.845396
2,0.01,32,0.005,0.2,,,0.844835
11,0.01,32,0.01,0.25,,,0.844555
6,0.01,32,0.008,0.2,,,0.843996
7,0.01,32,0.008,0.25,,,0.842868
5,0.01,32,0.008,0.15,,,0.842593
8,0.01,32,0.01,0.1,,,0.841187


In [13]:
accuracies_ft1_df.to_csv("output/accuracies_fine_tuning_1.csv")

Extract the parameters giving the highest value


In [14]:
accuracies_df_max_row = accuracies_ft1_df.loc[
    accuracies_ft1_df["accuracy"] == max(accuracies_ft1_df["accuracy"])
]
best_l2 = accuracies_df_max_row["l2_regularization"].iloc[0]
best_dropout = accuracies_df_max_row["dropout_rate"].iloc[0]

## Fine-tuning

### Second stage

momentum, weight_decay


In [15]:
# DataFrame that saves the parameters with accuracies for the first fine-tuning
accuracies_ft2_df = pd.DataFrame(
    columns=[
        "learning_rate",
        "batch_size",
        "l2_regularization",
        "dropout_rate",
        "momentum",
        "weight_decay",
        "accuracy",
    ]
)

In [16]:
momentum_values = [0.85, 0.88, 0.9]
decay_values = [1e-5, 5e-5, 1e-4, 5e-4]

num_iter = len(momentum_values) * len(decay_values)

i = 1

for momentum in momentum_values:
    for decay in decay_values:
        acc_list = []
        print(f"----------------------- {i}/{num_iter} -----------------------")
        for train_index, val_index in kf.split(X):
            # Split the data into training and testing sets
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Init the model in every iteration
            model, early_stopping, lrs = model_init(
                MyModel(l2=best_l2, dropout=best_dropout), best_lr, momentum, decay
            )

            history = model.fit(
                X_train,
                y_train,
                validation_data=(X_val, y_val),
                batch_size=best_batch,
                epochs=EPOCHS,
                callbacks=[early_stopping, lrs],
                verbose=0,
            )

            acc_list.append(history.history["binary_accuracy"][-1])

        print(
            f"momentum: {momentum}\t weight_decay: {decay}\t accuracy: {mean(acc_list)}"
        )

        # Create a DataFrame for the new row
        new_row = pd.DataFrame(
            [
                [
                    best_lr,
                    best_batch,
                    best_l2,
                    best_dropout,
                    momentum,
                    decay,
                    mean(acc_list),
                ]
            ],
            columns=[
                "learning_rate",
                "batch_size",
                "l2_regularization",
                "dropout_rate",
                "momentum",
                "weight_decay",
                "accuracy",
            ],
        )

        # Append the row using pd.concat()
        accuracies_ft2_df = pd.concat([accuracies_ft2_df, new_row], ignore_index=True)
        
        i += 1

----------------------- 1/12 -----------------------
momentum: 0.85	 weight_decay: 1e-05	 accuracy: 0.8504436135292053
----------------------- 2/12 -----------------------


  accuracies_ft2_df = pd.concat([accuracies_ft2_df, new_row], ignore_index=True)


momentum: 0.85	 weight_decay: 5e-05	 accuracy: 0.845396876335144
----------------------- 3/12 -----------------------
momentum: 0.85	 weight_decay: 0.0001	 accuracy: 0.8484800696372986
----------------------- 4/12 -----------------------
momentum: 0.85	 weight_decay: 0.0005	 accuracy: 0.8501619219779968
----------------------- 5/12 -----------------------
momentum: 0.88	 weight_decay: 1e-05	 accuracy: 0.8597034096717835
----------------------- 6/12 -----------------------
momentum: 0.88	 weight_decay: 5e-05	 accuracy: 0.8515668153762818
----------------------- 7/12 -----------------------
momentum: 0.88	 weight_decay: 0.0001	 accuracy: 0.8563365817070008
----------------------- 8/12 -----------------------
momentum: 0.88	 weight_decay: 0.0005	 accuracy: 0.8532553553581238
----------------------- 9/12 -----------------------
momentum: 0.9	 weight_decay: 1e-05	 accuracy: 0.8476413249969482
----------------------- 10/12 -----------------------
momentum: 0.9	 weight_decay: 5e-05	 accuracy:

In [17]:
accuracies_ft2_df.sort_values(by="accuracy", ascending=False)

Unnamed: 0,learning_rate,batch_size,l2_regularization,dropout_rate,momentum,weight_decay,accuracy
4,0.01,32,0.005,0.1,0.88,1e-05,0.859703
9,0.01,32,0.005,0.1,0.9,5e-05,0.858024
6,0.01,32,0.005,0.1,0.88,0.0001,0.856337
7,0.01,32,0.005,0.1,0.88,0.0005,0.853255
11,0.01,32,0.005,0.1,0.9,0.0005,0.853252
10,0.01,32,0.005,0.1,0.9,0.0001,0.852409
5,0.01,32,0.005,0.1,0.88,5e-05,0.851567
0,0.01,32,0.005,0.1,0.85,1e-05,0.850444
3,0.01,32,0.005,0.1,0.85,0.0005,0.850162
2,0.01,32,0.005,0.1,0.85,0.0001,0.84848


In [18]:
accuracies_ft2_df.to_csv("output/accuracies_fine_tuning_2.csv")

Extract the parameters giving the highest value


In [19]:
accuracies_df_max_row = accuracies_ft2_df.loc[
    accuracies_ft2_df["accuracy"] == max(accuracies_ft2_df["accuracy"])
]
best_momentum = accuracies_df_max_row["momentum"].iloc[0]
best_decay = accuracies_df_max_row["weight_decay"].iloc[0]

## Submit prediction


In [20]:
# Fit model
model, early_stopping, lrs = model_init(
    MyModel(l2=best_l2, dropout=best_dropout), best_lr, best_momentum, best_decay
)
model.fit(
    X,
    y,
    batch_size=best_batch,
    epochs=EPOCHS,
    callbacks=[early_stopping, lrs],
)

# Get predictions
y_pred = (model.predict(X_test) > 0.5).astype(np.intc)
predictions = np.concatenate(y_pred)

# Saving the predictions
output = pd.DataFrame({"PassengerId": test_data.index, "Survived": predictions})
if not os.path.isdir("output/"):
    os.mkdir("output/")
output.to_csv("output/submission.csv", index=False)

print("Your submission was successfully saved!")

Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - binary_accuracy: 0.7202 - loss: 2.0372 - learning_rate: 0.0100
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.7923 - loss: 1.8043 - learning_rate: 0.0100
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.7755 - loss: 1.7380 - learning_rate: 0.0100
Epoch 4/100


  current = self.get_monitor_value(logs)
  callback.on_epoch_end(epoch, logs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.8492 - loss: 1.6227 - learning_rate: 0.0100
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.8229 - loss: 1.6124 - learning_rate: 0.0100
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.8351 - loss: 1.5278 - learning_rate: 0.0100
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.7984 - loss: 1.4886 - learning_rate: 0.0100
Epoch 8/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.8308 - loss: 1.4298 - learning_rate: 0.0100
Epoch 9/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8209 - loss: 1.4020 - learning_rate: 0.0100
Epoch 10/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - binary_accuracy: 0.8032 