In [1]:
import numpy as np
import pandas as pd
from utils import *
import warnings
# to ignore pandas warning
warnings.simplefilter(action='ignore', category=FutureWarning)
import NeuralNetwork
import matplotlib.pyplot as plt

The tr-set is split in 70% to perform 5-fold cross validation on models, 10% for early stopping and 20% as internal test-set.

In [3]:
col_names = ('Input1', 'Input2', 'Input3', 'Input4', 'Input5', 'Input6',
             'Input7', 'Input8', 'Input9', 'Input10', 'TARGET_x', 'TARGET_y', 'TARGET_z')

if os.path.isfile("train_split.csv"):
    tr_data = pd.read_csv("train_split.csv",
                   skiprows=1, usecols=[i for i in range(1, 14)], names=col_names)
    test_data = pd.read_csv("test_split.csv",
                   skiprows=1, usecols=[i for i in range(1, 14)], names=col_names)
    es_data = pd.read_csv("es_split.csv",
                   skiprows=1, usecols=[i for i in range(1, 14)], names=col_names)
else:
    data = pd.read_csv("../ML-23-PRJ-Package/ML-CUP23-TR.csv",
                   skiprows=7, usecols=[i for i in range(1, 14)], names=col_names)
    data = data.sample(frac=1)
    #SPLIT 80/20 FOR TRAIN/TEST
    folds = np.array_split(data, 10)
    tr_data = pd.concat(folds[2:9])
    test_data = pd.concat(folds[0:2])
    es_data=folds[9]
    tr_data.to_csv("train_split.csv")
    test_data.to_csv("test_split.csv")
    es_data.to_csv("es_split.csv")



## Gridsearch
We will test some hyperparameter combinations in order to get the best model.
<p> Every model is trained with a 5-fold cross validation made on a 70% split on the original tr-set (10% is used for early stopping), MSE is used as a score on the validation set (within the 5-fold) to get the best model. 
<p> The following is only one of the grid search performed.

In [None]:
fullBatch = tr_data.shape[0]*0.8
grid={
"eta" : [0.001,0.005,0.0001],
    "mb" : [1,8],
    "momentum" : [0.9,0.6,0.5,0.7,0.8,0.95],
    "n_layers" : [2],
    "n_neurons" : [50,100,150,200],
    "epochs" : [500],
    "clip_value" : [None],
    "hid_act_fun" : ["tanh"],
    "out_act_fun" : ["linear"],
    "cost_fun" : ["eucl"],
    "ridge_lambda": [1e-8],
    "lasso_lambda": [None],
    "decay_max_steps": [None, 100],
    "decay_min_value": [10],
    "es_patience": [30]
}

search_space=get_search_space(grid)
print(len(search_space))
parallel_grid_search(k = 5, data = tr_data, es_data=es_data, search_space=search_space, n_inputs=10, n_outputs=3,type="cup")

In [3]:
grid = {
    "eta" : [0.0001],
    "mb" : [2],
    "momentum" : [0.6, 0.8,0.9,0.95,0.97],
    "n_layers" : [1],
    "n_neurons" : [400],
    "epochs" : [500],
    "clip_value" : [None],
    "hid_act_fun" : ["tanh"],
    "out_act_fun" : ["linear"],
    "cost_fun" : ["eucl"],
    "ridge_lambda": [0.00000001, None],
    "lasso_lambda": [None],
    "decay_max_steps": [None, 100],
    "decay_min_value": [10],
    "es_patience": [50]
}

search_space=get_search_space(grid)
print(len(search_space))
parallel_grid_search(k = 5, data = tr_data, es_data=es_data, search_space=search_space, n_inputs=10, n_outputs=3,type="cup")

20
N_cores = 10
GRID SEARCH FINISHED
[(-1.051403430985486, (0.006811756518426486, 0.7436724756694684, {'eta': 0.0001, 'mb': 2, 'momentum': 0.8, 'n_layers': 1, 'n_neurons': 400, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': 1e-08, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 50})), (-1.0178300267773444, (0.0006289629169450103, 0.7317306262447325, {'eta': 0.0001, 'mb': 2, 'momentum': 0.97, 'n_layers': 1, 'n_neurons': 400, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': 1e-08, 'lasso_lambda': None, 'decay_max_steps': 100, 'decay_min_value': 10, 'es_patience': 50})), (-0.902090833738715, (0.0024748594902683175, 0.5923929214073844, {'eta': 0.0001, 'mb': 2, 'momentum': 0.9, 'n_layers': 1, 'n_neurons': 400, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ri

Now we pick the best 5 models out of the grid search and will use them as an ensamble model.

The best results:

1. {'eta': 0.0001, 'mb': 1, 'momentum': 0.8, 'n_layers': 3, 'n_neurons': 200, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': 1e-08, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 30}<br>
Validation mean = 0.5776027113455716, Variance = 0.0026117155270263376<br>
Training mean (ES) = 0.2442599815747089

2. {'eta': 0.0001, 'mb': 1, 'momentum': 0.6, 'n_layers': 3, 'n_neurons': 200, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': 1e-08, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 30}<br>
Validation mean = 0.6003179533234724, Variance = 0.001471646127359402<br>
Training mean (ES) = 0.2625431767735682

3. {'eta': 0.0001, 'mb': 1, 'momentum': 0.9, 'n_layers': 2, 'n_neurons': 200, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': None, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 30}<br>
Validation mean = 0.6108271204955864, Variance = 0.002266016486143601<br>
Training mean (ES) = 0.25655232735599054

4. {'eta': 0.0001, 'mb': 8, 'momentum': 0.9, 'n_layers': 3, 'n_neurons': 200, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': None, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 30}<br>
Validation mean = 0.617711172518093, Variance = 0.0056700694722032425<br>
Training mean (ES) = 0.26035793552839165

5. {'eta': 0.0001, 'mb': 1, 'momentum': 0.5, 'n_layers': 3, 'n_neurons': 200, 'epochs': 500, 'clip_value': None, 'hid_act_fun': 'tanh', 'out_act_fun': 'linear', 'cost_fun': 'eucl', 'ridge_lambda': None, 'lasso_lambda': None, 'decay_max_steps': None, 'decay_min_value': 10, 'es_patience': 30}<br>
Validation mean = 0.6335390644035537, Variance = 0.004222898732823709<br>
Training mean (ES) = 0.2599176722840955


## Ensamble

Adesso va fatta una k-fold su best single model (con early stopping e senza), in ogni caso senza usare un es_data.
Si ripete il processo sul ensemble
il miglior validation su mee sarà il miglior modello finale, che verrà trainato su 800 entry e e testato su test set interno.

In [None]:
# inizializzazione dei modelli 
# aggiungo i modelli ad un array di NN

# lr decay is not used as a parameter beacuse there is no model among the best 5 which use it
params = {
    "eta" : [0.001,0.0001],
    "mb" : [1,8],
    "momentum" : [0.9,0.75,0.5],
    "es_stop": [],
    "hid_act_fun" : "tanh",
    "out_act_fun" : "linear",
    "cost_fun" : "mse",
    "ridge_lambda": [None,1e-8]
}


## Best model
We try comparing with 5-fold mee on internal validation set, the best single model with ES, without it and the ensamble model of the best 5.

In [4]:
# 5 fold with es
parameters = {
    'eta': 0.0001,
    'mb': 1,
    'momentum': 0.8, 
    'n_layers': 3, 
    'n_neurons': 200, 
    'epochs': 2000, 
    'clip_value': None, 
    'hid_act_fun': 'tanh', 
    'out_act_fun': 'linear', 
    'cost_fun': 'eucl', 
    'ridge_lambda': 1e-08, 
    'lasso_lambda': None, 
    'decay_max_steps': None, 
    'decay_min_value': 10, 
    'es_patience': 30 }

tr_data = pd.concat([tr_data,es_data])
ES_STOP = 0.244
k_fold(5, tr_data, parameters,None,"cup",3,10,es_stop=ES_STOP)



SyntaxError: invalid syntax (3377777339.py, line 21)

In [None]:
# 5 fold without es
parameters = {
    'eta': 0.0001,
    'mb': 1,
    'momentum': 0.8, 
    'n_layers': 3, 
    'n_neurons': 200, 
    'epochs': 2000, 
    'clip_value': None, 
    'hid_act_fun': 'tanh', 
    'out_act_fun': 'linear', 
    'cost_fun': 'eucl', 
    'ridge_lambda': 1e-08, 
    'lasso_lambda': None, 
    'decay_max_steps': None, 
    'decay_min_value': 10, 
    'es_patience': 30 }

k_fold(5, tr_data, parameters,None,"cup",3,10,es_stop=None)

In [None]:
parameters={
"eta" : [0.001,0.0001],
    "mb" : [1,8],
    "momentum" : [0.9,0.75,0.5],
    "n_layers" : [3],
    "n_neurons" : [100,150],
    "epochs" : [500],
    "clip_value" : [None],
    "hid_act_fun" : ["tanh"],
    "out_act_fun" : ["linear"],
    "cost_fun" : ["mse"],
    "ridge_lambda": [None,1e-8],
    "lasso_lambda": [None],
    "decay_max_steps": [None],
    "decay_min_value": [10],
    "es_patience": [30]
}

#plot_lossCup ha bisogno dei vettori di loss di test e training
# la cosa viene ripetuta anche per mee
# anche senza ES, in entrmabi i casi con 2000 epoche

#to retrain the model we will use both internal traing and early stopping set as tr-set
tr_set = pd.concat([tr_set,es_data])
n_layers = parameters["n_layers"]
n_neurons = parameters["n_neurons"]
# 2000 epochs retraining without early stopping 
net = NeuralNetwork(type="cup")
net.add_input_layer(10)
net.add_hidden_layer(10, n_neurons)
net.add_hidden_layer(n_neurons, n_neurons)
net.add_output_layer(n_neurons, 3)

test_mse,train_mse,test_mee,train_mee= net.train(tr_set, parameters,test_data=test_set,outFun2="eucl", progress_bar=True)
plot_loss_Cup()

In [None]:
# now with Early stopping, the training will stop when mee on training set will go under 'ES_STOP'
ES_STOP = 

net = NeuralNetwork(type="cup")
net.add_input_layer(10)
net.add_hidden_layer(10, n_neurons)
net.add_hidden_layer(n_neurons, n_neurons)
net.add_output_layer(n_neurons, 3)

# retraining with early stopping
test_mse,train_mse,test_mee,train_mee= net.train(tr_set, parameters,test_data=test_set,outFun2="eucl", progress_bar=True)
plot_loss_Cup()

test_mse,train_mse,test_mee,train_mee= net.train(tr_set, parameters,test_data=test_set,outFun2="eucl", progress_bar=True,es_stop=ES_STOP)
