In [1]:
import dataset_utils as dataset
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from Keras import *

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from dataset_utils import arange_datasets, train_val_kfold

In [2]:
dev_data = dataset.load_dataset("../data/ML-CUP23-TR.csv")
blind_data = dataset.load_dataset("../data/ML-CUP23-TS.csv")

In [3]:

seed = 18
# split the two dataset into inputs and labels, scale them, then kfold the devset for grid search
X, y, X_blind = arange_datasets(dev_data, blind_data)

# train-val-test split on devset
X_dev, X_test, y_dev, y_test = train_test_split(X, y, train_size=0.85, random_state=seed, shuffle=True)
train_folds, val_folds = train_val_kfold(X_dev, y_dev, folds=5, random_state=seed)
X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, test_size=0.3, random_state=seed, shuffle=True)

# Keras

In [4]:
parameters = [
    {'optimizer': 'SGD', 'learning_rate': [0.005, 0.0025, 0.001, 0.0001],
     'weight_decay': [0.0, 0.001, 0.0005, 0.0001],
     'momentum': [0.9, 0.75], 'nesterov': [True, False]}
]

In [5]:
prev_best = (1, 1)

In [6]:
def grid(treshhold, layer_configuration):
    new_best = prev_best = treshhold
    print(f"Layer configuration: {layer_configuration}")
    best_keras_params, res_values = keras_grid_search(model_builder=keras_mlp, parameters=parameters,
                                                      model_layers=layer_configuration,
                                                      train_data=train_folds, val_data=val_folds,
                                                      verbose=0, max_epochs=200, best_values=prev_best)
    if res_values[0] < prev_best[0] and res_values[1] < prev_best[1]:
        new_best = res_values

    print(f"Best combo: {best_keras_params}, with values: {new_best}")

In [8]:
layers = [
        ('dense', 200),
        ('dense', 200)
    ]
grid(prev_best, layers)

Layer configuration: [('dense', 200), ('dense', 200)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}


I0000 00:00:1705489112.006414 1564874 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Train Loss: 0.05067068189382553, Val Loss: 0.2952613592147827
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: 0.051228144019842145, Val Loss: 0.42931469678878786
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: 0.15304337292909623, Val Loss: 0.335204017162323
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: 0.1232564851641655, Val Loss: 0.304096919298172
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.06071796342730522, Val Loss: 0.32038230895996095
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': False}
Train Loss: 0.04753293171525001, Val Loss: 0.453837478160858

In [9]:
layers=[
        ('dense', 25),
        ('dense', 50),
        ('dense', 150)
    ]
grid(prev_best, layers)

Layer configuration: [('dense', 25), ('dense', 50), ('dense', 150)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.07463411912322045, Val Loss: 0.604947304725647
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: 0.05180114805698395, Val Loss: 0.7076589524745941
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: 0.14916541874408723, Val Loss: 0.3736623048782349
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: 0.14544326066970825, Val Loss: 0.3831360340118408
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.05538347512483597, Val Loss: 0.6473269104957

In [None]:
model_to_test = keras_mlp([
    ('dense', 25),
    ('dense', 50),
    ('dense', 150)
])
#Best combo: {'learning_rate': 0.005, 'weight_decay': 0.0005, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (0.05887814909219742, 0.43884475231170655)
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, cooldown=10, verbose=1,
                      factor=0.5,
                      min_lr=1e-7,
                      min_delta=1e-7),
    EarlyStopping(monitor='val_loss', start_from_epoch=100, patience=20,
                  min_delta=1e-7)
]
optim = k.optimizers.SGD(learning_rate=0.0045, momentum=0.9, weight_decay=0.000, nesterov=True)
hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=350, batch_size=50,
                  optimizer=optim, callback=callbacks)
plot_keras_history(hst, 20)

In [10]:
grid(prev_best, [
    ('dense', 150),
    ('dense', 50),
    ('dense', 25)
])

Layer configuration: [('dense', 150), ('dense', 50), ('dense', 25)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.30129295587539673, Val Loss: 1.8313605546951295
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: 2.388154983520508, Val Loss: 6.876751327514649
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: 0.21715184450149536, Val Loss: 0.6317972481250763
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: 0.22023009955883027, Val Loss: 0.5733217179775238
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.26844403743743894, Val Loss: 1.437655329704284


KeyboardInterrupt



In [0]:
model_to_test = keras_mlp([
    ('dense', 150),
    ('dense', 50),
    ('dense', 25)
])

optim = k.optimizers.SGD(learning_rate=0.01, momentum=0.9, weight_decay=0.0001, nesterov=True)

hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=150, batch_size=50,
                  optimizer=optim, callback=callbacks)

plot_keras_history(hst)


In [7]:
grid(prev_best, [
        ('dense', 300),
        ('dense', 300),
        ('dense', 300),
        ('dense', 300)
    ])

Layer configuration: [('dense', 300), ('dense', 300), ('dense', 300), ('dense', 300)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}


I0000 00:00:1705577434.773231    1015 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Train Loss: 0.041876498982310294, Val Loss: 2.6961245059967043
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: 0.01029393021017313, Val Loss: 1.6443861961364745
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: 0.025222624838352203, Val Loss: 0.3496615529060364
--------------------------------
New best parameters
--------------------------------
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: 0.02818046696484089, Val Loss: 0.29553282260894775
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True}
Train Loss: 0.1607966230250895, Val Loss: 2.1112640500068665
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': False}
Train Loss: 0.00924728373065591, Val Loss: 1.6987105369567

In [None]:
grid(prev_best, [
        ('dense', 500),
        ('dense', 500),
        ('dense', 300),
        ('dense', 300),
        ('dense', 150),
    ])

Layer configuration: [('dense', 500), ('dense', 500), ('dense', 300), ('dense', 300), ('dense', 150)]
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': True}
Train Loss: 4.006334018707276, Val Loss: 12.30111904144287
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.9, 'nesterov': False}
Train Loss: 2.2301490902900696, Val Loss: 9.267216873168945
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': True}
Train Loss: 0.031281442008912565, Val Loss: 1.2792925357818603
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.0, 'momentum': 0.75, 'nesterov': False}
Train Loss: 0.33110701814293864, Val Loss: 2.6364132881164553
Training with parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True}
Train Loss: 4.559013509750367, Val Loss: 12.17245101928711
Training with parameters: {'learning_rate': 0.005, 'we

In [0]:
#Best parameters: {'learning_rate': 0.005, 'weight_decay': 0.001, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (18.03679656982422, 18.47313709259033)
model_to_test = keras_mlp([
    ('bn', 1),
    ('dense', 300),
    ('dense', 300),
    ('dense', 300),
    ('dense', 300)
])
model_to_test.summary()
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, cooldown=20, verbose=1,
                      factor=0.25,
                      min_lr=1e-7,
                      min_delta=1e-7)
]
history = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=150,
                      optimizer=k.optimizers.SGD(learning_rate=0.005, momentum=0.9, weight_decay=0.001, nesterov=True),
                      callback=callbacks)
plot_keras_history(history)

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler

model_to_test = keras_mlp([
    ('dense', 350),
    ('dense', 350),
    ('dense', 150),
    ('dense', 150),
])


def scheduler(epochs, lr):
    newlr = lr
    if epochs % 100 == 0:
        newlr = lr * 0.5
    return newlr


callbacks = [
    LearningRateScheduler(scheduler)
]
#Best combo: {'learning_rate': 0.001, 'weight_decay': 0.0001, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}, with values: (0.0807236298918724, 0.35102823972702024)

optim = k.optimizers.SGD(learning_rate=0.001, momentum=0.9, weight_decay=0.0001, nesterov=True)
hst = keras_train(model_to_test, train_data=(X_train, y_train), val_data=(X_val, y_val), epochs=350,
                  optimizer=optim, callback=callbacks)

In [None]:
plot_keras_history(hst, 30)