In [1]:
import dataset_utils as dataset

from Keras import *
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from dataset_utils import arrange_datasets, train_val_kfold

In [2]:
# check if the gpu is available
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("No GPU available. Using CPU instead.")

print('GPU name: ', tf.config.experimental.list_physical_devices('GPU'))


Default GPU Device: /device:GPU:0
GPU name:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
dev_data = dataset.load_dataset("../data/ML-CUP23-TR.csv")
blind_data = dataset.load_dataset("../data/ML-CUP23-TS.csv")

In [4]:

seed = 18
# split the two dataset into inputs and labels, scale them, then kfold the devset for grid search
X, y, X_blind = arrange_datasets(dev_data, blind_data)

# train-val-test split on devset
X_dev, X_test, y_dev, y_test = train_test_split(X, y, train_size=0.85, random_state=seed, shuffle=True)
train_folds, val_folds = train_val_kfold(X_dev, y_dev, folds=5, random_state=seed)
X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, test_size=0.3, random_state=seed, shuffle=True)

# Keras

In [5]:
parameters = [
    {'optimizer': 'SGD', 'learning_rate': [0.005, 0.0025, 0.001, 0.0001],
     'weight_decay': [0.0, 0.001, 0.0005, 0.0001],
     'momentum': [0.9, 0.75], 'nesterov': [True, False]}
]

In [6]:
prev_best = (1, 1)

In [7]:
def grid(treshhold, layer_configuration):
    new_best = prev_best = treshhold
    print(f"Layer configuration: {layer_configuration}")
    best_keras_params, res_values = keras_grid_search(model_builder=keras_mlp, parameters=parameters,
                                                      model_layers=layer_configuration,
                                                      train_data=train_folds, val_data=val_folds,
                                                      verbose=0, max_epochs=200, best_values=prev_best)
    if res_values[0] < prev_best[0] and res_values[1] < prev_best[1]:
        new_best = res_values

    print(f"Best combo: {best_keras_params}, with values: {new_best}")

In [8]:
layers = [
    ('dense', 200),
    ('dense', 200)
]
grid(prev_best, layers)

Layer configuration: [('dense', 200), ('dense', 200)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}


I0000 00:00:1722261630.067539     140 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Train Loss: mean - 0.07585861012339593 std - 0.022297762719587957, Val Loss: mean - 0.3508461654186249 std - 0.040797248925533526
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: mean - 0.09956855773925781 std - 0.020860350639255593, Val Loss: mean - 0.4986847400665283 std - 0.1035445859711427
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: mean - 0.29133234918117523 std - 0.06060715114808233, Val Loss: mean - 0.6007025063037872 std - 0.1852731737193075
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: mean - 0.14393973648548125 std - 0.023217800102151732, Val Loss: mean - 0.30509832203388215 std - 0.049708104089788555
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001,

In [None]:
layers = [
    ('dense', 25),
    ('dense', 50),
    ('dense', 150)
]
grid(prev_best, layers)

Layer configuration: [('dense', 25), ('dense', 50), ('dense', 150)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}
Train Loss: mean - 0.1404857635498047 std - 0.05896111916474787, Val Loss: mean - 0.5533400774002075 std - 0.22615236576521514
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: mean - 0.20047926604747773 std - 0.0701104820514447, Val Loss: mean - 0.836604630947113 std - 0.27914232610635864
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: mean - 0.357501482963562 std - 0.15392222829402671, Val Loss: mean - 0.7077323257923126 std - 0.21272417754259665
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: mean - 0.33782474994659

In [None]:
model_to_test = keras_mlp([
    ('dense', 25),
    ('dense', 50),
    ('dense', 150)
])
#Best combo: {'learning_rate': 0.005, 'weight_decay': 0.0005, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (0.05887814909219742, 0.43884475231170655)
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, cooldown=10, verbose=1,
                      factor=0.5,
                      min_lr=1e-7,
                      min_delta=1e-7),
    EarlyStopping(monitor='val_loss', start_from_epoch=100, patience=20,
                  min_delta=1e-7)
]
optim = k.optimizers.SGD(learning_rate=0.0045, momentum=0.9, weight_decay=0.000, nesterov=True)
hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=350, batch_size=50,
                  optimizer=optim, callback=callbacks)
plot_keras_history(hst, 20)

In [None]:
grid(prev_best, [
    ('dense', 150),
    ('dense', 50),
    ('dense', 25)
])

In [None]:
model_to_test = keras_mlp([
    ('dense', 150),
    ('dense', 50),
    ('dense', 25)
])

optim = k.optimizers.SGD(learning_rate=0.01, momentum=0.9, weight_decay=0.0001, nesterov=True)

hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=150, batch_size=50,
                  optimizer=optim, callback=callbacks)

plot_keras_history(hst)


In [None]:
grid(prev_best, [
    ('dense', 300),
    ('dense', 300),
    ('dense', 300),
    ('dense', 300)
])

In [None]:
grid(prev_best, [
    ('dense', 500),
    ('dense', 500),
    ('dense', 300),
    ('dense', 300),
    ('dense', 150),
])

In [None]:
#Best parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (18.03679656982422, 18.47313709259033)
model_to_test = keras_mlp([
    ('bn', 1),
    ('dense', 300),
    ('dense', 300),
    ('dense', 300),
    ('dense', 300)
])
model_to_test.summary()
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, cooldown=20, verbose=1,
                      factor=0.25,
                      min_lr=1e-7,
                      min_delta=1e-7)
]
history = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=150,
                      optimizer=k.optimizers.SGD(learning_rate=0.005, momentum=0.9, weight_decay=0.001, nesterov=True),
                      callback=callbacks)
plot_keras_history(history)

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler

model_to_test = keras_mlp([
    ('dense', 350),
    ('dense', 350),
    ('dense', 150),
    ('dense', 150),
])


def scheduler(epochs, lr):
    newlr = lr
    if epochs % 100 == 0:
        newlr = lr * 0.5
    return newlr


callbacks = [
    LearningRateScheduler(scheduler)
]
#Best combo: {'learning_rate': 0.001, 'weight_decay': 0.0001, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (0.0807236298918724, 0.35102823972702024)

optim = k.optimizers.SGD(learning_rate=0.001, momentum=0.9, weight_decay=0.0001, nesterov=True)
hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=350,
                  optimizer=optim, callback=callbacks)

In [None]:
plot_keras_history(hst, 30)