# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import itertools
import torch
from torch import nn
torch.backends.cudnn.benchmark = False
from torch.utils.data.sampler import WeightedRandomSampler
from torch.utils.data import Dataset, DataLoader, Subset, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from torch.utils.tensorboard.summary import hparams
from pprint import pprint
from sklearn.metrics import precision_recall_fscore_support as score

from torchinfo import summary
from textwrap import dedent

from urllib.request import urlretrieve

import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: {}".format(device))

Device: cuda


The following function assures the reproducibility among experiments.

In [4]:
def set_reproducibility(seed = 42):
	torch.manual_seed(seed)
	np.random.seed(seed)
	os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
	torch.use_deterministic_algorithms(True)

# Design

## Class for data loading and pre-processing

By instatiating this class, we load the dataset in output by the *Data Manipulation* section of the notebook, we clean it with the same operations done in the *Data Cleaning section*, we split it into `X`, `y` and `ratings_count` as `weights` and, eventually, we discretize the continuous label.

In [2]:
class MoviesDataset(Dataset):
	def __init__(self):
		try: 
			df = pd.read_csv("datasets/df.csv")
		
		except FileNotFoundError:
			print(f"Download in progress of df.csv")
			file, _ = urlretrieve(url = "http://github.com/MickPerl/DataAnalyticsProject/releases/download/datasets/df.csv", filename="datasets/df.csv")
			df = pd.read_csv(file)

		df = pd.read_csv("datasets/df.csv")
		df = self.cleaning(df)

		X, y, weights = self.split_XYweights(df)

		y = self.discretization(y)

		self.num_classes = y.nunique()
		self.X = torch.FloatTensor(X.values)
		self.y = torch.LongTensor(y)
		self.weights = torch.FloatTensor(weights)

	def __len__(self):
		return self.X.shape[0]

	def __getitem__(self, idx):
		return self.X[idx, :], self.y[idx], self.weights[idx]

	def split_XYweights(self, df):
		y = df['rating_mean']
		weights = df['ratings_count']
		X = df.drop(columns=['ratings_count', 'rating_mean'], axis=1)
		return X, y, weights

	def cleaning(self, df):
		df.dropna(subset = ['rating_mean'], inplace=True)
		df_without_tags = df[df.iloc[:, 23:-2].isna().all(axis=1)]
		df_without_tags_nor_genres = df_without_tags[df_without_tags['(no genres listed)'] == 1]
		rows_to_be_deleted = df.loc[df["movieId"].isin(df_without_tags_nor_genres["movieId"])].index
		df.drop(rows_to_be_deleted, axis=0, inplace=True)
		df.iloc[:, 23:-2] = df.iloc[:, 23:-2].fillna(0)
		df.drop(['(no genres listed)'], inplace=True, axis=1)
		df_year_without_na = df.year[-pd.isna(df.year)]
		df.year = df.loc[:, 'year'].fillna(np.median(df_year_without_na)).astype('int')
		df.drop('movieId', inplace=True, axis=1)
		df.drop_duplicates(inplace=True)
		return df

	def discretization(self, series):
		return pd.cut(series, bins=5, labels=False)

## Class for the network architecture

By instiantiating this class, we build the network architecture.\
The architecture is highly parametrized: in particular, some of the parameters that it is possible to specify are the activation functions of the first layer, that of the hidden layers and that of the output layer as well as the number of hidden layers, the probability of dropout and batch normalization.- 

In [37]:
class Feedforward(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, af_first_layer, af_hidden_layers, af_output_layer, num_hidden_layers, dropout, batch_norm):
        super(Feedforward, self).__init__()
    
        model = [nn.Linear(input_size, hidden_size), af_first_layer]

        for i in range(num_hidden_layers):
            model.append(nn.Linear(hidden_size, hidden_size))

            if batch_norm:
                model.append(nn.BatchNorm1d(hidden_size))
            
            model.append(af_hidden_layers)
            
            if dropout != 0:
                model.append(nn.Dropout(dropout))
    

        model.append(nn.Linear(hidden_size, num_classes))

        if af_output_layer :
            model.append(af_output_layer)

        self.model = nn.Sequential(*model)
        

    def forward(self, x):
        return self.model(x)


## Training function

In [38]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

def train_model(model, criterion, optimizer, data_loader, epochs, n_bad_epochs, device, tb, cardinality_training_set):
	model.train()

	loss_values = []	# to store loss values over all batches regardless distinct epochs: it's the list we return after training
	# loss_values_every_epoch = []
	# accuracy_every_epoch = []

	n_bad_epochs = n_bad_epochs
	patience = 0
	min_loss = np.Inf


	for epoch in range(epochs):
		losses_batches_current_epoch = []	# to store loss values over all batches with regard to a single epoch to checking condition about early stopping
		correct_batches_current_epoch = []
		
		for batch_idx, samples in enumerate(data_loader):
			data, targets = samples[0].to(device), samples[1].to(device)
			optimizer.zero_grad()

			# Forward pass
			
			y_pred = model(data)
			# Compute Loss
			if str(criterion) == "CrossEntropyLoss()":
				loss = criterion(y_pred, targets)
			else:	# "KLDivLoss()"
				targets_one_hot_encoded = torch.nn.functional.one_hot(targets, num_classes=5).float()
				loss = criterion(y_pred, targets_one_hot_encoded)

			correct = get_num_correct(y_pred, targets)
			
			tb.add_scalar("Loss every batch", loss, epoch * len(data_loader) + batch_idx + 1)
			tb.add_scalar("Correct every batch", correct, epoch * len(data_loader) + batch_idx + 1)
			tb.add_scalar("Accuracy every batch", correct / len(data), epoch * len(data_loader) + batch_idx + 1)

			loss_values.append(loss.item())
			losses_batches_current_epoch.append(loss.item())
			correct_batches_current_epoch.append(correct)

			# Backward pass
			loss.backward()

			# Looking for batch containing bad samples which cause inf/nan gradients or loss:
			# ideally, this samples should to be removed, but in this case we want only automatically
			# skip them and continue training.
			valid_gradients = True
			for name, param in model.named_parameters():
				if param.grad is not None:
					if torch.isnan(param.grad).any():
						print(f"{name} is nan, so model parameters are not going to be updated: this batch is skipped and the gradient is reset.")
						optimizer.zero_grad()
						valid_gradients = False
					if torch.isinf(param.grad).any():
						print(f"{name} is inf, so model parameters are not going to be updated: this batch is skipped and the gradient is reset.")
						optimizer.zero_grad()
						valid_gradients = False
			if not valid_gradients :
				continue
			
			optimizer.step()
			
			# for name, value in model.named_parameters():
			# 	name = name.replace('.', '/')
			# 	tb.add_histogram('every batch_' + name, param.data.cpu().detach().numpy(), batch_idx + 1)
			# 	tb.add_histogram('every batch_' + name + '/grad', param.grad.data.cpu().numpy(), batch_idx + 1)
		
		total_correct_current_epoch = np.sum(correct_batches_current_epoch)
		tb.add_scalar("Correct every epoch", total_correct_current_epoch, epoch)

		accuracy_current_epoch = total_correct_current_epoch / cardinality_training_set
		tb.add_scalar("Accuracy every epoch", accuracy_current_epoch, epoch)

		# accuracy_every_epoch.append(accuracy_current_epoch)

		for name, param in model.named_parameters():
			name = name.replace('.', '/')
			tb.add_histogram('every epoch_' + name, param.data.cpu().detach().numpy(), epoch)
			tb.add_histogram('every epoch_' + name + '/grad', param.grad.data.cpu().numpy(), epoch)

		mean_loss_current_epoch = np.mean(losses_batches_current_epoch)

		if epoch < 5 :
			print(f"Epoch: {epoch}\t Mean Loss: {mean_loss_current_epoch}")
			continue
		
		if epoch == 5 :
			print("Waiting for three consecutive epochs during which the mean loss over batches does not decrease...")
        
		if mean_loss_current_epoch < min_loss:
			# Save the model
			# torch.save(model)
			patience = 0
			min_loss = mean_loss_current_epoch
		else:
			patience += 1

		print(f"Epoch: {epoch}\t Mean Loss: {mean_loss_current_epoch}\t Current min mean loss: {min_loss}")

		if patience == n_bad_epochs:
			print(f"Early stopped at {epoch}-th epoch, since the mean loss over batches didn't decrease during the last {n_bad_epochs} epochs")
			return model, loss_values, epoch, mean_loss_current_epoch, accuracy_current_epoch


	return model, loss_values, epoch, mean_loss_current_epoch, accuracy_current_epoch 

# Testing function

In [39]:
def test_model(model, data_loader, device, output_dict = False):
	model.eval()
	y_pred = []
	y_test = []
	
	for batch_idx, samples in enumerate(data_loader):
	    data, targets = samples[0].to(device), samples[1].to(device)
	    y_pred.append(model(data))
	    y_test.append(targets)
	y_pred = torch.stack(y_pred).squeeze()
	y_test = torch.stack(y_test).squeeze()
	y_pred = y_pred.argmax(dim=1, keepdim=True).squeeze()
	return classification_report(y_test.cpu(), y_pred.cpu(), zero_division=0, output_dict=output_dict)

## Utilities

def class_weights(y):
    class_count = torch.bincount(y)
    class_weighting = 1. / class_count
    sample_weights = class_weighting[y]   # np.array([weighting[t] for t in y_train])
    return sample_weights

Due to a bug in the TensorBoard porting to PyTorch, we inherit the `SummaryWriter` class and overwrite the `add_hparams` function with some modifications.

In [None]:
class SummaryWriter(SummaryWriter):

	def add_hparams(self, hparam_dict, metric_dict):
		torch._C._log_api_usage_once("tensorboard.logging.add_hparams")
		if type(hparam_dict) is not dict or type(metric_dict) is not dict:
			raise TypeError('hparam_dict and metric_dict should be dictionary.')
		exp, ssi, sei = hparams(hparam_dict, metric_dict)

		self.file_writer.add_summary(exp)
		self.file_writer.add_summary(ssi)
		self.file_writer.add_summary(sei)
		for k, v in metric_dict.items():
			if v is not None:
				self.add_scalar(k, v)

In [None]:
def dict_configs_from_params_cartesian_product(hyperparams) :
	name_params = list(hyperparams.keys())
	cartesian_product_filtered = []
	cartesian_product_config_params = itertools.product(*hyperparams.values())

	for conf_params in cartesian_product_config_params:
		conf_params_dict = {name_params[i]: conf_params[i] for i in range(len(hyperparams))}
		
		if conf_params_dict['batch_norm'] and conf_params_dict['batch_size'] < 32 : # non ha significatività statistica
			# Skipped config with batch_size < 32 and batch norm, since batches aren't statistically significant.
			continue

		if str(conf_params_dict['loss_function']) == "CrossEntropyLoss()" and conf_params_dict['af_output_layer'] != None:
			# Skipped config with CrossEntropy as loss function and whichever activation function in the output layer,
			# since CrossEntropy always contains SoftMax as activation function of output layer.
			continue

		if str(conf_params_dict['loss_function']) == "KLDivLoss()" and str(conf_params_dict['af_output_layer']) != "LogSoftmax(dim=1)":
			# Skipped config with Kullback-Leibler divergence as loss function and whichever activation function
			# in the output layer other than SoftMax: since Kullback-Leibler divergence works with probability
			# distributions, it's suitable the SoftMax as the activation function of the output layer in that it
			# returns a probability distribution over classes for each feature vector in input.
			continue
		
		if conf_params_dict['dropout'] == 0.5 and conf_params_dict['hidden_size'] < 64 :
			continue

		if conf_params_dict['dropout'] == 0.2 and conf_params_dict['hidden_size'] > 32 :
			continue

		#######################
		#if conf_params_dict['num_hidden_layers'] != 10 or conf_params_dict['batch_size'] != 512 or conf_params_dict['learning_rate'] != 1e-5:
		#	continue

		cartesian_product_filtered.append(conf_params_dict)
	
	return cartesian_product_filtered


def split_configs_params(dict_configs, nr_sets = 4):
	assert len(dict_configs) % nr_sets == 0,  "The number of configs params sets have to be a dividend of the cardinality of all configs."
	print(f"Newly created sets (ratio {nr_sets}:1 to all {len(dict_configs)} configs):")

	for i in range(nr_sets):
		globals()[f"configs_set{i}"] = np.array_split(dict_configs, nr_sets)[i]
		print(f"configs_set{i}")

# Neural Network in action

## Creation training, validation and test set

In [3]:
dataset = MoviesDataset()
train_idx, test_idx = train_test_split(np.arange(len(dataset)), test_size=0.2, stratify=dataset.y, random_state=42)
train_idx, val_idx = train_test_split(train_idx, test_size=0.1, stratify=dataset.y[train_idx], random_state=42)

# MinMaxScale training, validation and testing set su year e title_length
X_train = dataset.X[train_idx]
X_val = dataset.X[val_idx]
X_test = dataset.X[test_idx]

train_year_max = torch.max(X_train[:,0])
train_year_min = torch.min(X_train[:,0])
dataset.X[train_idx, 0] = (X_train[:,0] - train_year_min)/(train_year_max - train_year_min)
dataset.X[val_idx, 0] = (X_val[:,0] - train_year_min)/(train_year_max - train_year_min)
dataset.X[test_idx, 0] = (X_test[:,0] - train_year_min)/(train_year_max - train_year_min)

train_title_length_max = torch.max(X_train[:,1])
train_title_length_min = torch.min(X_train[:,1])
dataset.X[train_idx, 1] = (X_train[:,1] - train_title_length_min)/(train_title_length_max - train_title_length_min)
dataset.X[val_idx, 1] = (X_val[:,1] - train_title_length_min)/(train_title_length_max - train_title_length_min)
dataset.X[test_idx, 1] = (X_test[:,1] - train_title_length_min)/(train_title_length_max - train_title_length_min)

## Managing imbalance

In [42]:
# Creating samplers to manage unbalancing classes

y_train = dataset.y[train_idx]

sample_weights = class_weights(y_train)
sampler_class_frequency = WeightedRandomSampler(sample_weights, len(train_idx))

In [None]:
# MinMaxScaling ratings_count
#       weights_train = dataset.weights[train_idx] 
#       weights_val = dataset.weights[val_idx]
#       weights_test = dataset.weights[test_idx] 
#       
#       weights_train_max = torch.max(weights_train)
#       weights_train_min = torch.min(weights_train)
#       dataset.weights[train_idx]  = (weights_train - weights_train_min) / (weights_train_max - weights_train_min)
#       dataset.weights[val_idx] = (weights_val - weights_train_min) / (weights_train_max - weights_train_min)
#       dataset.weights[test_idx] = (weights_test - weights_train_min) / (weights_train_max - weights_train_min)
#       
#       sampler_ratings_count = WeightedRandomSampler(dataset.weights[train_idx], len(train_idx))

In [47]:
# distribuzione classi nei vari mini batch
train_subset = Subset(dataset, train_idx)
train_loader=DataLoader(train_subset, batch_size=128, shuffle=False, sampler=sampler_class_frequency, drop_last=True)

for i, samples in enumerate(train_loader):
    print(len(np.where(samples[1].numpy() == 0)[0]),
        len(np.where(samples[1].numpy() == 1)[0]),
        len(np.where(samples[1].numpy() == 2)[0]),
        len(np.where(samples[1].numpy() == 3)[0]),
        len(np.where(samples[1].numpy() == 4)[0]), sep = "\t"
    )

23	30	23	24	28
29	25	23	27	24
24	24	30	24	26
30	22	28	27	21
36	25	23	18	26
29	19	21	28	31
27	30	26	21	24
21	31	29	26	21
22	28	25	27	26
27	23	29	22	27
18	27	31	30	22
25	23	22	25	33
25	29	27	22	25
25	22	27	31	23
26	27	16	27	32
28	25	28	20	27
26	26	25	24	27
24	26	25	29	24
16	26	31	31	24
23	21	28	28	28
30	23	17	22	36
25	24	24	23	32
32	18	29	26	23
24	30	26	25	23
25	19	30	24	30
22	19	30	29	28
16	25	33	26	28
26	30	23	21	28
21	28	28	26	25
29	23	23	25	28
30	15	25	40	18
20	25	31	26	26
25	27	27	20	29
27	30	19	27	25
19	31	35	24	19
22	26	24	28	28
38	23	18	25	24
28	18	27	31	24
20	26	29	26	27
18	22	30	29	29
32	22	21	26	27
26	24	30	30	18
26	29	19	29	25
25	21	34	24	24
30	28	21	27	22
29	23	23	34	19
25	21	31	21	30
22	29	25	27	25
34	22	21	25	26
31	22	26	24	25
29	25	18	24	32
26	24	24	28	26
27	32	22	21	26
28	22	28	25	25
19	21	22	33	33
28	28	26	22	24
28	30	21	22	27
21	29	22	30	26
27	31	23	21	26
23	21	31	31	22
37	19	27	27	18
27	24	27	23	27
24	25	23	31	25
17	26	28	28	29
22	27	29	26	24
28	24	36	22	18
19	25	23	3

## Defining first hyperparameters space

In [13]:
first_hyperparams = {
	'num_epochs' : [500],
	'n_bad_epochs': [3],
	'num_hidden_layers' : [1, 3, 5, 7],
	'hidden_size' : [8, 16, 32, 64, 128],
	'batch_size' : [16, 32, 64, 128, 256],
	'af_first_layer' : [nn.Tanh(), nn.LeakyReLU()],
	'af_hidden_layers' : [nn.LeakyReLU()],
	'af_output_layer' : [None, nn.LogSoftmax(dim=1)],
	'loss_function' : [nn.CrossEntropyLoss(), nn.KLDivLoss(reduction = 'batchmean')], 
	'dropout' : [0, 0.2, 0.5],
	'batch_norm' : [False, True],
	'learning_rate' : [0.01, 0.001], 
	'optimizer': ["torch.optim.SGD", "torch.optim.Adam"],
	'weight_decay': [1e-4]		
}

## First training

In [None]:
set_reproducibility()	

first_configs = dict_configs_from_params_cartesian_product(first_hyperparams)
nr_sets = 7
split_configs_params(first_configs, nr_sets)

idx_set = 1
assert idx_set < nr_sets, f"You can specify a set with an index until {nr_sets-1}"
config_set = eval(f"configs_set{idx_set}") 

if config_set == first_configs:
	nr_train = 0
else :
	nr_train = len(configs_set0) * idx_set
	
columns = ["nr_train"] + list(first_configs[0].keys()) + ["epoch_stopped", "loss", "accuracy", "precision", "precision_total", "recall", "recall_total", "f1_score", "f1_score_total", "support"]
results_first_ft = pd.DataFrame(columns=columns)

for config_params in config_set:
	nr_train += 1
	print(f"{nr_train}° training with params:")
	pprint(config_params)

	list_params_config = list(map(str, list(config_params.values())))
	name_run = '__'.join(list_params_config)
	with SummaryWriter(log_dir=os.path.join('tensorboard_logs', f"{idx_set}_out_of_{nr_sets - 1}", 'Train_' + str(nr_train), name_run)) as tb:
	# tb = SummaryWriter(log_dir=os.path.join('tensorboard_logs', f"{idx_set}_out_of_{nr_sets - 1}", 'Train_' + str(nr_train), name_run))

		train_subset = Subset(dataset, train_idx)
		val_subset=Subset(dataset, val_idx)
		test_subset=Subset(dataset, test_idx)
		train_loader=DataLoader(train_subset, batch_size=config_params['batch_size'], shuffle=False, sampler=sampler_class_frequency, drop_last=True)
		val_loader=DataLoader(val_subset, batch_size=1, shuffle=False, drop_last=True)
		test_loader=DataLoader(test_subset, batch_size=1, shuffle=False, drop_last=True)

		model = Feedforward(
			dataset.X.shape[1],
			config_params['hidden_size'],
			dataset.num_classes,
			config_params['af_first_layer'],
			config_params['af_hidden_layers'],
			config_params['af_output_layer'],
			config_params['num_hidden_layers'],
			config_params['dropout'], 
			config_params['batch_norm'])

		model.to(device)
		input_model = dataset.X[train_idx][:config_params['batch_size']].to(device)
		tb.add_graph(model, input_model)

		summary(model, input_size=(config_params['batch_size'], int(35850 // config_params['batch_size']), 1149), col_names= ["input_size","output_size", "num_params"], verbose=1)
		# dataset.X[train_idx].shape[1] == 1149, dataset.X[train_idx].shape[0] == 35850			provare verbose = 2 per weight e bias
		# test_model(model, val_loader, device)

		loss_func = config_params['loss_function'] 

		optim = eval(config_params['optimizer'] + "(model.parameters(), lr=config_params['learning_rate'])")

		cardinality_training_set = len(X_train)
		model, loss_values, epoch_stopped, loss_value_last_epoch, accuracy_last_epoch = train_model(model, loss_func, optim, train_loader, config_params['num_epochs'], config_params['n_bad_epochs'], device, tb, cardinality_training_set)
		
		print(f"Loss: {loss_value_last_epoch}", end="\n\n")

		report = test_model(model, val_loader, device, True)
		index_classes = len(report) - 3
		f1_score = [float(report[str(i)]['f1-score']) for i in range(index_classes)]
		f1_score_total = np.sum(f1_score)
		precision = [float(report[str(i)]['precision']) for i in range(index_classes)]
		precision_total = np.sum(precision)
		recall = [float(report[str(i)]['recall']) for i in range(index_classes)]
		recall_total = np.sum(recall)
		support = [int(report[str(i)]['support']) for i in range(index_classes)]
		accuracy = report['accuracy']


		row_values= [nr_train] + list_params_config + [epoch_stopped, loss_value_last_epoch, accuracy, precision, precision_total, recall, recall_total, f1_score, f1_score_total, support]
		results_first_ft=results_first_ft.append(pd.Series(row_values, index=columns), ignore_index=True)
		# plt.plot(loss_values)
		# plt.title("Number of epochs: {}".format(num_epochs))
		# plt.show()

		dict_params_config = {list(config_params.keys())[z]: list_params_config[z] for z in range(len(config_params))}
		tb.add_hparams(hparam_dict = dict_params_config, metric_dict = {"Accuracy every epoch": None, "Loss every epoch": None})
		tb.flush()
		tb.close()
	del model, optim, train_loader, val_loader

In [None]:
if config_set == first_configs:
	results_first_ft.to_csv("tuning_hyperparams/results_first_ft.csv", index=False)
else :
	results_first_ft.to_csv(f"tuning_hyperparams/results_nrSets{nr_sets}_idxSet{idx_set}.csv", index=False)

In [8]:
results_first_ft = pd.concat([pd.read_csv(f"results_hyperparams_optimization/NN/results_nrSets6_idxSet{i}.csv") for i in range(6)], ignore_index=True)

In [9]:
results_first_ft.to_csv("tuning_hyperparams/results_first_ft.csv", index=False)

In [10]:
results_first_ft

Unnamed: 0,nr_train,num_epochs,n_bad_epochs,num_hidden_layers,hidden_size,batch_size,af_first_layer,af_hidden_layers,af_output_layer,loss_function,...,batch_norm,learning_rate,optimizer,epoch_stopped,loss,accuracy,precision,recall,f1_score,support
0,1,500,3,1,8,16,Tanh(),LeakyReLU(negative_slope=0.01),,CrossEntropyLoss(),...,False,0.010,torch.optim.SGD,18,2267.686243,0.383549,"[0.09982788296041308, 0.3317757009345794, 0.74...","[0.47540983606557374, 0.21194029850746268, 0.3...","[0.16500711237553342, 0.25865209471766853, 0.4...","[122, 335, 1143, 1416, 157]"
1,2,500,3,1,8,16,Tanh(),LeakyReLU(negative_slope=0.01),,CrossEntropyLoss(),...,False,0.010,torch.optim.Adam,8,2360.448489,0.369367,"[0.12181303116147309, 0.21195652173913043, 0.7...","[0.3524590163934426, 0.3492537313432836, 0.314...","[0.18105263157894738, 0.26381059751972946, 0.4...","[122, 335, 1143, 1416, 157]"
2,3,500,3,1,8,16,Tanh(),LeakyReLU(negative_slope=0.01),,CrossEntropyLoss(),...,False,0.001,torch.optim.SGD,56,2355.466820,0.367791,"[0.09498680738786279, 0.33116883116883117, 0.6...","[0.5901639344262295, 0.15223880597014924, 0.32...","[0.16363636363636364, 0.2085889570552147, 0.44...","[122, 335, 1143, 1416, 157]"
3,4,500,3,1,8,16,Tanh(),LeakyReLU(negative_slope=0.01),,CrossEntropyLoss(),...,False,0.001,torch.optim.Adam,26,2176.760260,0.384494,"[0.07963246554364471, 0.22418879056047197, 0.7...","[0.4262295081967213, 0.22686567164179106, 0.32...","[0.13419354838709677, 0.2255192878338279, 0.45...","[122, 335, 1143, 1416, 157]"
4,5,500,3,1,8,16,Tanh(),LeakyReLU(negative_slope=0.01),,CrossEntropyLoss(),...,False,0.010,torch.optim.SGD,33,2309.363213,0.369997,"[0.09691629955947137, 0.4016393442622951, 0.81...","[0.5409836065573771, 0.14626865671641792, 0.30...","[0.16438356164383564, 0.21444201312910288, 0.4...","[122, 335, 1143, 1416, 157]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5755,5756,500,3,7,128,256,LeakyReLU(negative_slope=0.01),LeakyReLU(negative_slope=0.01),LogSoftmax(dim=1),KLDivLoss(),...,False,0.001,torch.optim.Adam,13,144.306607,0.343839,"[0.07921714818266543, 0.5180722891566265, 0.84...","[0.6967213114754098, 0.12835820895522387, 0.25...","[0.14225941422594143, 0.20574162679425836, 0.3...","[122, 335, 1143, 1416, 157]"
5756,5757,500,3,7,128,256,LeakyReLU(negative_slope=0.01),LeakyReLU(negative_slope=0.01),LogSoftmax(dim=1),KLDivLoss(),...,True,0.010,torch.optim.SGD,23,161.961369,0.060511,"[0.04694113120430935, 0.0, 0.1079136690647482,...","[1.0, 0.0, 0.03937007874015748, 0.017655367231...","[0.08967291436971703, 0.0, 0.05769230769230769...","[122, 335, 1143, 1416, 157]"
5757,5758,500,3,7,128,256,LeakyReLU(negative_slope=0.01),LeakyReLU(negative_slope=0.01),LogSoftmax(dim=1),KLDivLoss(),...,True,0.010,torch.optim.Adam,26,141.734804,0.340687,"[0.10296191819464035, 0.6833333333333333, 0.74...","[0.5983606557377049, 0.12238805970149254, 0.29...","[0.17569193742478942, 0.20759493670886076, 0.4...","[122, 335, 1143, 1416, 157]"
5758,5759,500,3,7,128,256,LeakyReLU(negative_slope=0.01),LeakyReLU(negative_slope=0.01),LogSoftmax(dim=1),KLDivLoss(),...,True,0.001,torch.optim.SGD,33,169.741466,0.054523,"[0.042156185210780926, 0.0, 0.0789473684210526...","[1.0, 0.0, 0.005249343832020997, 0.03107344632...","[0.08090185676392572, 0.0, 0.00984413453650533...","[122, 335, 1143, 1416, 157]"


## Defining second hyperparameters space

In [14]:
second_hyperparams = {
	'num_epochs' : [500],
	'n_bad_epochs': [3],
	'num_hidden_layers' : [3, 5, 7, 10],
	'hidden_size' : [16, 64, 128, 256],
	'batch_size' : [16, 64, 256, 512],
	'af_first_layer' : [nn.Tanh(), nn.LeakyReLU()],
	'af_hidden_layers' : [nn.LeakyReLU()],
	'af_output_layer' : [None, nn.LogSoftmax(dim=1)],
	'loss_function' : [nn.CrossEntropyLoss(), nn.KLDivLoss(reduction = 'batchmean')], 
	'dropout' : [0, 0.5],
	'batch_norm' : [False, True],
	'learning_rate' : [0.01, 1e-5], 
	'optimizer': ["torch.optim.SGD", "torch.optim.Adam"],
	'weight_decay': [1e-4]		
}

In [None]:
results_second_ft.to_csv("tuning_hyperparams/results_second_ft.csv", index=False)

In [6]:
results_second_ft = pd.read_csv("tuning_hyperparams/results_second_ft.csv")

## Defining third hyperparameters space

In [22]:
new_new_hyperparams = {
	'num_epochs' : [500],
	'n_bad_epochs': [3],
	'num_hidden_layers' : [12, 15, 18],
	'hidden_size' : [64, 128, 256],
	'batch_size' : [256, 512, 1024, 2048],
	'af_first_layer' : [nn.LeakyReLU()],
	'af_hidden_layers' : [nn.LeakyReLU()],
	'af_output_layer' : [None, nn.LogSoftmax(dim=1)],
	'loss_function' : [nn.CrossEntropyLoss(), nn.KLDivLoss(reduction = 'batchmean')], 
	'dropout' : [0, 0.5],
	'batch_norm' : [False, True],
	'learning_rate' : [1e-5], 
	'optimizer': ["torch.optim.Adam"],
	'weight_decay': [1e-4]		
}

In [None]:
results_third_ft.to_csv("tuning_hyperparams/results_third_ft.csv", index=False)

In [5]:
results_third_ft = pd.read_csv("tuning_hyperparams/results_third_ft.csv")

## Defining fourth hyperparameters space for regression task

In [15]:
regression_hyperparams = {
	'num_epochs' : [500],
	'n_bad_epochs': [3],
	'num_hidden_layers' : [3, 5, 7, 10],
	'hidden_size' : [64, 128, 256],
	'batch_size' : [16, 64, 256, 512],
	'af_first_layer' : [nn.Tanh(), nn.LeakyReLU()],
	'af_hidden_layers' : [nn.LeakyReLU()],
	'af_output_layer' : [None],
	'loss_function' : [nn.MSELoss()], 
	'dropout' : [0, 0.5],
	'batch_norm' : [False, True],
	'learning_rate' : [0.01, 1e-5], 
	'optimizer': ["torch.optim.Adam"],
	'weight_decay': [1e-4]		
}

In [None]:
set_reproducibility()	

if config_set == first_configs:
	nr_train = 0
else :
	nr_train = len(configs_set0) * idx_set
	
columns = ["nr_train"] + list(first_configs[0].keys()) + ["epoch_stopped", "loss", "accuracy", "precision", "precision_total", "recall", "recall_total", "f1_score", "f1_score_total", "support"]
results = pd.DataFrame(columns=columns)

for config_params in config_set:
	nr_train += 1
	print(f"{nr_train}° training with params:")
	pprint(config_params)

	list_params_config = list(map(str, list(config_params.values())))
	name_run = '__'.join(list_params_config)
	with SummaryWriter(log_dir=os.path.join('tensorboard_logs', f"{idx_set}_out_of_{nr_sets - 1}", 'Train_' + str(nr_train), name_run)) as tb:
	# tb = SummaryWriter(log_dir=os.path.join('tensorboard_logs', f"{idx_set}_out_of_{nr_sets - 1}", 'Train_' + str(nr_train), name_run))

		train_subset = Subset(dataset, train_idx)
		val_subset=Subset(dataset, val_idx)
		test_subset=Subset(dataset, test_idx)
		train_loader=DataLoader(train_subset, batch_size=config_params['batch_size'], shuffle=False, sampler=sampler_class_frequency, drop_last=True)
		val_loader=DataLoader(val_subset, batch_size=1, shuffle=False, drop_last=True)
		test_loader=DataLoader(test_subset, batch_size=1, shuffle=False, drop_last=True)

		model = Feedforward(
			dataset.X.shape[1],
			config_params['hidden_size'],
			dataset.num_classes,
			config_params['af_first_layer'],
			config_params['af_hidden_layers'],
			config_params['af_output_layer'],
			config_params['num_hidden_layers'],
			config_params['dropout'], 
			config_params['batch_norm'])

		model.to(device)
		input_model = dataset.X[train_idx][:config_params['batch_size']].to(device)
		tb.add_graph(model, input_model)

		summary(model, input_size=(config_params['batch_size'], int(35850 // config_params['batch_size']), 1149), col_names= ["input_size","output_size", "num_params"], verbose=1)
		# dataset.X[train_idx].shape[1] == 1149, dataset.X[train_idx].shape[0] == 35850			provare verbose = 2 per weight e bias
		# test_model(model, val_loader, device)

		loss_func = config_params['loss_function'] 

		optim = eval(config_params['optimizer'] + "(model.parameters(), lr=config_params['learning_rate'])")

		cardinality_training_set = len(X_train)
		model, loss_values, epoch_stopped, loss_value_last_epoch, accuracy_last_epoch = train_model(model, loss_func, optim, train_loader, config_params['num_epochs'], config_params['n_bad_epochs'], device, tb, cardinality_training_set)
		
		print(f"Loss: {loss_value_last_epoch}", end="\n\n")

		report = test_model(model, val_loader, device, True)
		index_classes = len(report) - 3

		f1_score = [float(report[str(i)]['f1-score']) for i in range(index_classes)]
		f1_score_total = np.sum(f1_score)

		def MSE(metrics_per_class):
			mean = np.mean(metrics_per_class)
			sum_errors_squared = 0
			for j in metrics_per_class:
				sum_errors_squared += np.square(j - f1_score_mean)
			return np.sqrt(sum_errors_squared)

		f1_score_mse = MSE(f1_score)

		precision = [float(report[str(i)]['precision']) for i in range(index_classes)]
		precision_total = np.sum(precision)
		precision_mse = MSE(precision)

		recall = [float(report[str(i)]['recall']) for i in range(index_classes)]
		recall_total = np.sum(recall)
		recall_mse = MSE(recall)

		support = [int(report[str(i)]['support']) for i in range(index_classes)]
		accuracy = report['accuracy']


		row_values= [nr_train] + list_params_config + [epoch_stopped, loss_value_last_epoch, accuracy, precision, precision_total, recall, recall_total, f1_score, f1_score_total, support]
		results=results.append(pd.Series(row_values, index=columns), ignore_index=True)
		# plt.plot(loss_values)
		# plt.title("Number of epochs: {}".format(num_epochs))
		# plt.show()

		dict_params_config = {list(config_params.keys())[z]: list_params_config[z] for z in range(len(config_params))}
		tb.add_hparams(hparam_dict = dict_params_config, metric_dict = {"Accuracy every epoch": None, "Loss every epoch": None})
		tb.flush()
		tb.close()
	del model, optim, train_loader, val_loader

In [32]:
results_regression_ft = pd.read_csv("results_hyperparams_optimization/NN/results_NN_regression.csv")

In [None]:
new_results['mse_precision'] = [mean_squared_error([1:-1]) for i in results.f1_score]

In [21]:
mse_list = []
for i in new_results.precision:
	i = list(map(float, i[1:-1].split(", ")))
	sum_errors_squared = 0
	mean = np.mean(i)
	for j in i:
		sum_errors_squared += np.square(j - mean)
	mse_list.append(np.sqrt(sum_errors_squared))

## Testing with best hyperparams

In [None]:
test_model(model, test_loader, device)

## Predicting

In [None]:
def predict(row, model):
    row = torch.Tensor([row])
    yhat = model(row)
    yhat = yhat.detach().numpy()
    return yhat

## Saving model