In [None]:
from datetime import datetime

import numpy as np
from sklearn.preprocessing import StandardScaler

import dataset_utils as dataset
import torch as t

from ML_cup.pytorch import *

In [None]:
import os

# Check if GPU is available
if torch.cuda.is_available():
    print("GPU is available")
    torch.set_default_device(torch.device("cuda"))
print(f"Using device: {torch.cuda.current_device()}")
print(f"Available cpu count: {os.cpu_count()}")

In [None]:
# Load the data
dev_data = dataset.load_dataset("../data/ML-CUP24-TR.csv")
blind_data = dataset.load_dataset("../data/ML-CUP24-TS.csv")

In [None]:
import time

# get a seed for the random state based on the current time
rand = int(time.time())
print(f"Random seed: {rand}")
np.random.seed(rand)

In [None]:
# rescale the data
dev_data, X_scaler, y_scaler = dataset.rescale_dataset(dev_data)
blind_data = dataset.rescale_dataset(blind_data, X_scaler)

# plot the dev data's targets on xyz axis
# the targets are the last 3 columns of the data
y = np.array(dev_data)[:, -3:]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(y[:, 0], y[:, 1], y[:, 2])
plt.show()

# Model Definitions

In [None]:
def MLP() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 200),
        t.nn.Tanh(),
        t.nn.Linear(200, 200),
        t.nn.Tanh(),
        t.nn.Linear(200, 3)
    )

def MLPr() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 200),
        t.nn.ReLU(),
        t.nn.Linear(200, 200),
        t.nn.ReLU(),
        t.nn.Linear(200, 3)
    )

# v2, same depth, more neurons per layer. 300 neurons per layer
# Test with more than this amount didn't show any improvement
def MLPv2r() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 300),
        t.nn.ReLU(),
        t.nn.Linear(300, 300),
        t.nn.ReLU(),
        t.nn.Linear(300, 3)
    )


def MLPv2() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 300),
        t.nn.Tanh(),
        t.nn.Linear(300, 300),
        t.nn.Tanh(),
        t.nn.Linear(300, 3)
    )

# v3, more depth but a different structure. Increasing number of neurons at each layer, didn't show any improvement
def MLPv3() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 100),
        t.nn.Tanh(),
        t.nn.Linear(100, 200),
        t.nn.Tanh(),
        t.nn.Linear(200, 300),
        t.nn.Tanh(),
        t.nn.Linear(300, 3)
    )

def MLPv3r() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 100),
        t.nn.ReLU(),
        t.nn.Linear(100, 200),
        t.nn.ReLU(),
        t.nn.Linear(200, 300),
        t.nn.ReLU(),
        t.nn.Linear(300, 3)
    )

# v4, same depth as v2, but less neurons per layer. 50 neurons per layer
def MLPv4() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 50),
        t.nn.Tanh(),
        t.nn.Linear(50, 50),
        t.nn.Tanh(),
        t.nn.Linear(50, 3)
    )


def MLPv4r() -> t.nn.Module:
    return t.nn.Sequential(
        t.nn.Linear(12, 50),
        t.nn.ReLU(),
        t.nn.Linear(50, 50),
        t.nn.ReLU(),
        t.nn.Linear(50, 3)
    )

# Grid search for the best parameters

# FullBatch

In [None]:
# Split the dev data into train and validation with k-fold cross validation
train_loaders, val_loaders, dev_loader, test_loader = dataset.torch_k_fold(dataset=dev_data, folds=5, batch_size=0,
                                                                           random_state=rand)
# blind test
from ML_cup.dataset_utils import CupDataset

blind_loader = DataLoader(CupDataset(blind_data, device=torch.device('cuda')), batch_size=len(blind_data))

# Model v1
For this model we split the parameters into two groups, one for the SGD optimizer and one for the Adam optimizer to split the execution cells, as we needed a better look at the results in tensorboard.

In [None]:
# Here we split the parameters into two groups, one for the SGD optimizer and one for the Adam optimizer to split the execution cells
parameters_SGD = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.0005, 0.0001, 5),
        'weight_decay': [0.001, 0.0025, 0.005],
        'momentum': [0.8, 0.9, 0.95],
        'nesterov': [True, False]
    }]
parameters_Adam = [
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.0002, 0.0001, 10),
        'weight_decay': np.linspace(0.0001, 0.0005, 5)
    }
]

## SGD

In [None]:
# perform grid search to find the best parameters
best_pytorch_params_s, model = grid_search(model_builder=MLP, parameters=parameters_SGD, random_seed=rand,
                                           train_loader=train_loaders,
                                           val_loader=val_loaders, max_epochs=1000,
                                           scheduler=('', {}),
                                           stability_threshold=1e-6, patience=5,
                                           clip=1.0,
                                           tensorboard_folder_base=f'runs/fullbatch/MLP/scaled/SGD/tanh/{rand}',
                                           y_scaler=y_scaler)

## Adam

In [None]:
# perform grid search to find the best parameters
best_pytorch_params_a, model = grid_search(model_builder=MLP, parameters=parameters_Adam, random_seed=rand,
                                           train_loader=train_loaders,
                                           val_loader=val_loaders, max_epochs=1000,
                                           scheduler=('', {}),
                                           stability_threshold=1e-5, patience=5,
                                           clip=1.0,
                                           tensorboard_folder_base=f'runs/fullbatch/MLP/scaled/Adam/tanh/{rand}',
                                           y_scaler=y_scaler)

# Model v2

In [None]:
parameters = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.005, 0.001, 5),
        'weight_decay': [0.001, 0.005, 0.0015, 0.0025],
        'momentum': [0.8, 0.9, 0.95],
        'nesterov': [True, False]
    },
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.0005, 0.0001, 5),
        'weight_decay': [0.0, 0.001, 0.0005]
    }
]

In [None]:
# perform grid search to find the best parameters
best_pytorch_params, model = grid_search(model_builder=MLPv2, parameters=parameters, train_loader=train_loaders,
                                         val_loader=val_loaders, scheduler=('', {}),
                                         patience=20, clip=1.0, stability_threshold=2e-4, max_epochs=1500,
                                         tensorboard_folder_base=f'runs/fullbatch/MLP2/scaled/tanh/{rand}',
                                         random_seed=rand, y_scaler=y_scaler)
print(best_pytorch_params)


In [None]:
# perform grid search to find the best parameters
best_pytorch_params, model = grid_search(model_builder=MLPv2r, parameters=parameters, train_loader=train_loaders,
                                         val_loader=val_loaders, scheduler=('', {}),
                                         patience=20, clip=1.0, stability_threshold=2e-4, max_epochs=1500,
                                         tensorboard_folder_base=f'runs/fullbatch/MLP2r/scaled/relu/{rand}',
                                         random_seed=rand, y_scaler=y_scaler)
print(best_pytorch_params)

# Model v3

In [None]:
# perform grid search to find the best parameters
best_pytorch_params, model = grid_search(model_builder=MLPv3, parameters=parameters, train_loader=train_loaders,
                                         val_loader=val_loaders,
                                         scheduler=('', {}),
                                         clip=1.0, max_epochs=1500,
                                         stability_threshold=5e-4, patience=20,
                                         tensorboard_folder_base=f'runs/fullbatch/MLP3/scaled/tanh/{rand}',
                                         random_seed=rand, y_scaler=y_scaler)
print(best_pytorch_params)

# Model v4

In [None]:
#perform grid search to find the best parameters
best_pytorch_params, model = grid_search(model_builder=MLPv4, parameters=parameters, train_loader=train_loaders,
                                         val_loader=val_loaders,
                                         scheduler=('', {}),
                                         clip=1.0, max_epochs=1500,
                                         stability_threshold=0.1, patience=20,
                                         tensorboard_folder_base=f'runs/fullbatch/MLP4/scaled/{rand}',
                                         random_seed=rand, y_scaler=y_scaler)

# Minibatch

In [None]:
# Split the dev data into train and validation with k-fold cross validation
train_loaders, val_loaders, dev_loader, test_loader = dataset.torch_k_fold(dataset=dev_data, folds=5, batch_size=20,
                                                                           random_state=rand)
# blind test
from ML_cup.dataset_utils import CupDataset

blind_loader = DataLoader(CupDataset(blind_data, device=torch.device('cuda')))

# Model v1
For this model we split the parameters into two groups, one for the SGD optimizer and one for the Adam optimizer to split the execution cells, as we needed a better look at the results in tensorboard.

## SGD

In [None]:
parameters = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.0075, 0.000075, 5),
            'weight_decay': np.linspace(0.01, 0.00001, 5),
        'momentum': [0.9, 0.95, 0.8],
        'nesterov': [True, False]
    }
]
# perform grid search to find the best parameters
best_pytorch_params, best_model = grid_search(model_builder=MLP, parameters=parameters, random_seed=rand,
                                              train_loader=train_loaders,
                                              val_loader=val_loaders, max_epochs=1500,
                                              scheduler=('', {}),
                                              stability_threshold=0.25, patience=10,
                                              clip=1.0,
                                              tensorboard_folder_base=f'runs/minibatch/MLP/SGD/scaled/tanh/{rand}',
                                              y_scaler=y_scaler)

## Adam

In [None]:
parameters = [
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.0005, 0.0001, 10),
        'weight_decay': np.linspace(0.001, 0.00001, 10),
    }]

best_pytorch_params, best_model = grid_search(model_builder=MLP, parameters=parameters, random_seed=rand,
                                              train_loader=train_loaders, val_loader=val_loaders,
                                              scheduler=('', {}), max_epochs=1500,
                                              stability_threshold=0.25, patience=10,
                                              clip=1.0,
                                              tensorboard_folder_base=f'runs/minibatch/MLP/Adam/scaled/tanh/{rand}',
                                              y_scaler=y_scaler)
# retrain the model with the best parameters on the whole dataset
print(best_pytorch_params)
model_v1 = best_model
# Evaluate the trained model on the test set
print("Evaluating the model on the test set")
print(torch_predict(model_v1, test_loader))

blind_test(model_v1, blind_loader, rand, y_scaler=y_scaler)

# Model v2

## Relu

In [None]:
parameters = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.0025, 0.0001, 5),
        'weight_decay': [0.001, 0.005, 0.0015, 0.0025],
        'momentum': [0.9, 0.95],
        'nesterov': [True]
    },
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.0003, 0.000075, 10),
        'weight_decay': np.linspace(0.0001, 0.0005, 10),
    }
]

best_pytorch_params, best_model = grid_search(model_builder=MLPv2r, parameters=parameters, random_seed=rand,
                                              train_loader=train_loaders, val_loader=val_loaders,
                                              scheduler=('', {}), max_epochs=1500,
                                              stability_threshold=0.25, patience=10,
                                              clip=1.0, tensorboard_folder_base=f'runs/minibatch/MLP2/scaled/relu/{rand}',
                                              y_scaler=y_scaler)
print(best_pytorch_params, best_model)

## Tanh

In [None]:
parameters = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.0025, 0.0001, 5),
        'weight_decay': [0.001, 0.005, 0.0015, 0.0025],
        'momentum': [0.9, 0.95],
        'nesterov': [True]
    },
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.0003, 0.000075, 10),
        'weight_decay': np.linspace(0.0001, 0.0005, 10),
    }
]

# perform grid search to find the best parameters
best_pytorch_params, best_model = grid_search(model_builder=MLPv2, parameters=parameters, train_loader=train_loaders,
                                              val_loader=val_loaders, scheduler=('', {}), max_epochs=1500,
                                              patience=20, clip=1.0, stability_threshold=0.2,
                                              tensorboard_folder_base=f'runs/minibatch/MLP2/scaled/tanh/{rand}',
                                              random_seed=rand, y_scaler=y_scaler)
print(best_pytorch_params)

# Model v3

In [None]:
parameters_sgd3 = [
    {
        'optimizer': 'SGD',
        'lr': np.linspace(0.00075, 0.0001, 5),
        'weight_decay': [0.001, 0.0005, 0.0015, 0.0025],
        'momentum': [0.8, 0.9],
        'nesterov': [True]
    }]
parameters_adam3 = [
    {
        'optimizer': 'Adam',
        'lr': np.linspace(0.000025, 0.0000075, 5),
        'weight_decay': np.linspace(0.0001, 0.00005, 10)
    }]

In [None]:
# perform grid search to find the best parameters
best_pytorch_params, model_v3 = grid_search(model_builder=MLPv3, parameters=parameters_sgd3,
                                            train_loader=train_loaders,
                                            val_loader=val_loaders,
                                            scheduler=('', {}),
                                            clip=1.0, max_epochs=1500,
                                            stability_threshold=5e-5, patience=20,
                                            tensorboard_folder_base=f'runs/minibatch/MLP3/scaled/SGD/{rand}',
                                            random_seed=rand, y_scaler=y_scaler)
# retrain the model with the best parameters
# best_pytorch_params = {'lr': 0.001, 'weight_decay': 0.005, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}
print(best_pytorch_params)

In [None]:
# perform grid search to find the best parameters
best_pytorch_params, model_v3 = grid_search(model_builder=MLPv3, parameters=parameters_adam3,
                                            train_loader=train_loaders,
                                            val_loader=val_loaders,
                                            scheduler=('', {}),
                                            clip=1.0, max_epochs=1500,
                                            stability_threshold=5e-5, patience=20,
                                            tensorboard_folder_base=f'runs/minibatch/MLP3/scaled/Adam/{rand}',
                                            random_seed=rand, y_scaler=y_scaler)
# retrain the model with the best parameters
# best_pytorch_params = {'lr': 0.001, 'weight_decay': 0.005, 'momentum': 0.9, 'nesterov': True, 'optimizer': 'SGD'}
print(best_pytorch_params)

## Model v4

In [None]:
#perform grid search to find the best parameters
best_pytorch_params, model = grid_search(model_builder=MLPv4, parameters=parameters, train_loader=train_loaders,
                                         val_loader=val_loaders,
                                         scheduler=('', {}),
                                         clip=1.0, max_epochs=1500,
                                         stability_threshold=0.1, patience=20,
                                         tensorboard_folder_base=f'runs/minibatch/MLP4/scaled/{rand}',
                                         random_seed=rand, y_scaler=y_scaler)