In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split

# per importare cose nella folder precedente da ora in poi
import sys
sys.path.append('../')

from layer import HiddenLayer
from activations import *
from losses import *
from network import Network
from regularizers import *
from utils import *
from metrics import *
from validation import *
from gridsearch import grid_search

random.seed(42)
np.random.seed(42)

# Progress bar
from tqdm import tqdm
import time

### Reading cup data and splitting training and test set

In [None]:
X, y, blind_test = read_cup("../data/ML-CUP23-TR.csv", "../data/ML-CUP23-TS.csv")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### first Grid Search
Each entry in the dictionary is a different hypeparameter, the associated list contains the corresponding values that we want to test

In [None]:
hyperparameters = {
    'layers_sizes': [[512, 128, 3]],
    'activations': ["ReLU"], 
    'startings': ["Range_random", "Fan_in"],
    'startings_range': [(-0.1, 0.1),(-0.5, 0.5)],
    'regularizers': ["L2", "L2"],
    'regularizers_lambda': [1e-6, 1e-8],
    'momentums': [('Nesterov', 0.7), ('Nesterov', 0.9), ('Standard', 0.7), ('Standard', 0.9)],
    'etas': [1e-4, 1e-6],
}

data_to_csv = grid_search(
    hyperparams=hyperparameters,
    X=X_train,
    y=y_train,
    k=5,
    metric=MEE(),
    loss=MSE(1),
    seed=6,
    verbose=True
)

The following cell will save the grid result in the specified csv file

In [None]:
import csv

csv_file_path = '../statistics/test.csv'
with open(csv_file_path, mode='a', newline='') as file:
    writer = csv.writer(file)
    # Write the data to the CSV file
    for row in data_to_csv:
        writer.writerow(row)   

### Checking the best net

Once we decided the hyperparameters we can perform a second, finer search changing each layer specific parameters.

In [None]:
np.random.seed(6)
net = Network(MSE(1), eta=5e-4, tau=500, cyclic=True) 

net.add_layer(
    input_size=len(X_train[0]),
    units_size=512,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

net.add_layer(
    input_size=512,
    units_size=128,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Standard', 0.99)
)

net.add_layer(
    input_size=128,
    units_size=3,
    activation=Identity(),
    starting=Range_random(low=-0.05, high=0.05),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

We can also choose the validation parameters, hence the number of folds, max epochs, patience and threshold

In [None]:
data_to_csv = kfold_crossval(
    X=X_train,
    y=y_train,
    k=5,
    net=net,
    metric=MEE(),
    epochs=10000,
    patience=500,
    verbose=True
)

print(data_to_csv)

The following cell prints the net, layer wise and the correspondig performance

In [None]:
import csv
layers_csv = []
net_csv = []
csv_file_path = '../statistics/test.csv'
with open(csv_file_path, mode='a', newline='') as file:
    writer = csv.writer(file)
    net_csv.append(net.to_csv())
    writer.writerow(net_csv)
    for layer in net.layers:
        layers_csv.append(layer.to_csv())
    writer.writerow(layers_csv)
    # Write the data to the CSV file
    for row in data_to_csv:
        writer.writerow(row)

# Let's test in the test set! (NOT the blind one)
- k-fold cv (train and val set) gridsearch --> in the cells above
- train the best model on the whole training set
- test on the test set

In [None]:
# Set the hyperparameters of the model than won the gridsearch
np.random.seed(6)

best_net = Network(MSE(1), eta=5e-4)

best_net.add_layer(
    input_size=len(X_train[0]),
    units_size=64,
    starting=Fan_in(),
    activation=ReLU(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

best_net.add_layer(
    input_size=64,
    units_size=64,
    starting=Fan_in(),
    activation=ReLU(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

best_net.add_layer(
    input_size=64,
    units_size=3,
    starting=Fan_in(),
    activation=Identity(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

We have the best net, let's train it on the whole training set. BUT when should we stop? We try 2 things:
- early stopping using the whole training set as validation set
- max number of epochs that were necessary to the best model in the k-fold cv

1. Here we train the best model on the whole training set using early stopping monitoring the whole training set 

In [None]:
info = best_net.train(
    X_train, 
    y_train,
    X_train, 
    y_train,
    epochs=10000, 
    metric=MEE(),
    patience=250
)

In [None]:
# Let's print the last value of the loss and metric on the training set (= val set)
print("Train Loss: ", info['tr_losses'][-1])
print("Train Metric: ", info['tr_metrics'][-1])

# Let's plot the train (and val) loss and metric
plt.figure(figsize=(10,5))

plt.subplot(1, 2, 1)
plt.plot(info['tr_losses'], label='Train Loss')
plt.plot(info['val_losses'], label='Val Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(info['tr_metrics'], label='Train Metric')
plt.plot(info['val_metrics'], label='Val Metric')
plt.legend()


plt.tight_layout()
plt.show()

In [None]:
# Let's see the output of the best model on the test set
y_pred = np.zeros((X_test.shape[0], 3, 1))

for i in range(X_test.shape[0]):
    y_pred[i] = best_net.forward(X_test[i])

# Let's print the metric on the test set
print("Test Metric: ", MEE()(y_test, y_pred))

2. Let's train the best model on the whole training set using the max number of epochs that were necessary to the best model in the k-fold cv

In [None]:
# !!! RESET THE BEST NET !!!
np.random.seed(6)

best_net = Network(MSE(1), eta=5e-4)

best_net.add_layer(
    input_size=len(X_train[0]),
    units_size=64,
    starting=Fan_in(),
    activation=ReLU(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

best_net.add_layer(
    input_size=64,
    units_size=64,
    starting=Fan_in(),
    activation=ReLU(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

best_net.add_layer(
    input_size=64,
    units_size=3,
    starting=Fan_in(),
    activation=Identity(),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.9)
)

In [None]:
info = best_net.train(
    X_train, 
    y_train,
    X_train, 
    y_train,
    epochs=1465, 
    metric=MEE()
)

In [None]:
# Let's print the last value of the loss and metric on the training set (= val set)
print("Train Loss: ", info['tr_losses'][-1])
print("Train Metric: ", info['tr_metrics'][-1])

# Let's plot the train (and val) loss and metric
plt.figure(figsize=(10,5))

plt.subplot(1, 2, 1)
plt.plot(info['tr_losses'], label='Train Loss')
plt.plot(info['val_losses'], label='Val Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(info['tr_metrics'], label='Train Metric')
plt.plot(info['val_metrics'], label='Val Metric')
plt.legend()


plt.tight_layout()
plt.show()

In [None]:
# Let's see the output of the best model on the test set
y_pred = np.zeros((X_test.shape[0], 3, 1))

for i in range(X_test.shape[0]):
    y_pred[i] = best_net.forward(X_test[i])

# Let's print the metric on the test set
print("Test Metric: ", MEE()(y_test, y_pred))

## We retrain the best nn on the whole training set (80%)

In [None]:
np.random.seed(6)

net = Network(MSE(1), eta=5e-4, tau=1000, cyclic=False) 

net.add_layer(
    input_size=len(X_train[0]),
    units_size=512,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

net.add_layer(
    input_size=512,
    units_size=128,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Standard', 0.99)
)

net.add_layer(
    input_size=128,
    units_size=3,
    activation=Identity(),
    starting=Range_random(low=-0.05, high=0.05),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

info = net.train(
    X_train=X_train,
    y_train=y_train,
    X_val=X_train,
    y_val=y_train,
    epochs=10000,
    metric=MEE(),
    final_retrain=True,
    final_tr_loss=0.0121
)

In [None]:
print(net.statistics(X_test, y_test, MEE()))

## Final retrain on all the data and calculating the output


In [None]:
net = Network(MSE(1), eta=5e-4, tau=1000, cyclic=False) 

net.add_layer(
    input_size=len(X_train[0]),
    units_size=512,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

net.add_layer(
    input_size=512,
    units_size=128,
    activation=ReLU(),
    starting=Fan_in(low=-0.5, high=0.5),
    regularizer=L2(lambda_=1e-8),
    momentum=('Standard', 0.99)
)

net.add_layer(
    input_size=128,
    units_size=3,
    activation=Identity(),
    starting=Range_random(low=-0.05, high=0.05),
    regularizer=L2(lambda_=1e-8),
    momentum=('Nesterov', 0.99)
)

info = net.train(
    X_train=X,
    y_train=y,
    X_val=X,
    y_val=y,
    epochs=10000,
    metric=MEE(),
    final_retrain=True,
    final_tr_loss=0.01211, # Mean loss over kfold CV
)

In [None]:
folder_path = '..'

outputs = []
for x in blind_test:
    outputs.append(net.forward(x))

index = 1
with open(f'{folder_path}/ML-CUP23-TS.csv', 'w') as file:
    file.write('# Irene Dovichi, Marco Lavorini, Ilaria Ritelli\n')
    file.write('# BunnyProp\n')
    file.write('# ML-CUP23\n')
    file.write('# 30/01/2024\n')  # Change if needed

    for output in outputs:
        line = f'{index},{output[0][0]},{output[1][0]},{output[2][0]}\n'
        file.write(line)
        index += 1
