In [67]:
import optuna
from optuna.samplers import TPESampler
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import random
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from joblib import dump

In [68]:
# CUDA
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE.type

'cuda'

In [69]:
# Fix random seed
seed = 42
torch.manual_seed(seed)
if DEVICE.type == 'cuda':
	torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
sampler = TPESampler(seed=seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Pre-processing input data

In [70]:
def bits_to_MiB(row):
	# verify if has string ' MiB'
	if 'MiB' in str(row):
		row = row.replace(' MiB', '')
		row = float(row)
	else:
		row = float(row) / np.power(2, 20)
	return row


def MHz_to_GHz(row):
	# verify if has string ' GHz'
	if 'GHz' in str(row):
		row = row.replace(' GHz', '')
		# convert to float
		row = float(row)
	else:
		row = row.replace(' MHz', '')
		row = float(row) / 1000
	return row

In [71]:
results_df = pd.read_csv('../results_new/execution_time.csv')
results_savio_df = pd.read_csv('../results_savio_new/execution_time.csv')
results_df = pd.concat([results_df, results_savio_df], ignore_index=True)
# preprocessing
results_df['total_cpu_usage'] = results_df['total_cpu_usage'].str.replace('%', '').astype(float) / 100
results_df['max_ram_usage'] = results_df['max_ram_usage'] / 1024
results_df['l2_cache_size'] = results_df['l2_cache_size'].apply(bits_to_MiB)
results_df['l3_cache_size'] = results_df['l3_cache_size'].apply(bits_to_MiB)
results_df['ghz_actual_friendly'] = results_df['hz_actual_friendly'].apply(MHz_to_GHz)
results_df['ghz_advertised_friendly'] = results_df['hz_advertised_friendly'].str.replace('GHz', '').astype(float)
results_df = results_df.drop(columns=['hz_actual_friendly', 'hz_advertised_friendly', 'arch', 'vendor_id_raw'])

In [72]:
# Make the target dataset
target_df = results_df[['total_time', 'brand_raw', 'count', 'l2_cache_size', 'l3_cache_size', 'l2_cache_line_size', 'l2_cache_associativity', 'ghz_advertised_friendly', 'benchmark']].copy()
# Rename columns to *_target
target_df = target_df.rename(columns={
    'total_time': 'total_time_target',
    'brand_raw': 'brand_raw_target',
    'count': 'count_target',
    'l2_cache_size': 'l2_cache_size_target',
    'l3_cache_size': 'l3_cache_size_target',
    'l2_cache_line_size': 'l2_cache_line_size_target',
    'l2_cache_associativity': 'l2_cache_associativity_target',
    'ghz_advertised_friendly': 'ghz_advertised_friendly_target',
})

dataset_df = pd.merge(results_df, target_df, how='inner', on='benchmark')
dataset_df = dataset_df[dataset_df['brand_raw'] != dataset_df['brand_raw_target']]
dataset_df.head(2)

Unnamed: 0,total_time,total_cpu_usage,max_ram_usage,brand_raw,count,l2_cache_size,l3_cache_size,l2_cache_line_size,l2_cache_associativity,benchmark,ghz_actual_friendly,ghz_advertised_friendly,total_time_target,brand_raw_target,count_target,l2_cache_size_target,l3_cache_size_target,l2_cache_line_size_target,l2_cache_associativity_target,ghz_advertised_friendly_target
5,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,45.91,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496
6,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,25.77,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496


In [73]:
# remove one computer for testing
g_train = dataset_df[(dataset_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (dataset_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
g_test = dataset_df[dataset_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [74]:
mm_df = dataset_df[dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
mm_train = mm_df[(mm_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (mm_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
mm_test = mm_df[mm_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [75]:
st_df = dataset_df[~dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
st_train = st_df[(st_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (st_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
st_test = st_df[st_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [76]:
# load test dataset
g_test = pd.read_csv('csv/g_test.csv')
st_test = pd.read_csv('csv/st_test.csv')
mm_test = pd.read_csv('csv/mm_test.csv')

In [77]:
target = 'total_time_target'
features = mm_test.columns.copy().drop(target).drop(['benchmark','brand_raw', 'brand_raw_target'])
features_st = features.copy().drop(['count', 'count_target'])

In [78]:
# general data
## split data
X_g_train = g_train[features]
y_g_train = g_train[target]

X_g_test = g_test[features]
y_g_test = g_test[target]

## normalize data
scaler_g = StandardScaler()
X_g_train = scaler_g.fit_transform(X_g_train)
X_g_test = scaler_g.transform(X_g_test)

## convert to tensor
X_g_train = torch.tensor(X_g_train, dtype=torch.float32).unsqueeze(1)
X_g_test = torch.tensor(X_g_test, dtype=torch.float32).unsqueeze(1)
y_g_train = torch.tensor(y_g_train.values, dtype=torch.float32).view(-1, 1)
y_g_test = torch.tensor(y_g_test.values, dtype=torch.float32).view(-1, 1)

In [79]:
# single thread data
## split data
X_st_train = st_train[features_st]
y_st_train = st_train[target]

X_st_test = st_test[features_st]
y_st_test = st_test[target]

## normalize data
scaler_st = StandardScaler()
X_st_train = scaler_st.fit_transform(X_st_train)
X_st_test = scaler_st.transform(X_st_test)

## convert to tensor
X_st_train = torch.tensor(X_st_train, dtype=torch.float32).unsqueeze(1)
X_st_test = torch.tensor(X_st_test, dtype=torch.float32).unsqueeze(1)
y_st_train = torch.tensor(y_st_train.values, dtype=torch.float32).view(-1, 1)
y_st_test = torch.tensor(y_st_test.values, dtype=torch.float32).view(-1, 1)

In [80]:
# multi thread data
## split data
X_mm_train = mm_train[features]
y_mm_train = mm_train[target]

X_mm_test = mm_test[features]
y_mm_test = mm_test[target]

## normalize data
scaler_mm = StandardScaler()
X_mm_train = scaler_mm.fit_transform(X_mm_train)
X_mm_test = scaler_mm.transform(X_mm_test)

## convert to tensor
X_mm_train = torch.tensor(X_mm_train, dtype=torch.float32).unsqueeze(1)
X_mm_test = torch.tensor(X_mm_test, dtype=torch.float32).unsqueeze(1)
y_mm_train = torch.tensor(y_mm_train.values, dtype=torch.float32).view(-1, 1)
y_mm_test = torch.tensor(y_mm_test.values, dtype=torch.float32).view(-1, 1)

In [81]:
if DEVICE.type == 'cuda':
	# move to DEVICE
	X_g_train = X_g_train.to(DEVICE)
	y_g_train = y_g_train.to(DEVICE)
	X_g_test = X_g_test.to(DEVICE)
	y_g_test = y_g_test.to(DEVICE)

	X_st_train = X_st_train.to(DEVICE)
	y_st_train = y_st_train.to(DEVICE)
	X_st_test = X_st_test.to(DEVICE)
	y_st_test = y_st_test.to(DEVICE)

	X_mm_train = X_mm_train.to(DEVICE)
	y_mm_train = y_mm_train.to(DEVICE)
	X_mm_test = X_mm_test.to(DEVICE)
	y_mm_test = y_mm_test.to(DEVICE)

# Model

In [82]:
class FeedforwardModel(nn.Module):
	def __init__(self, input_dim, output_dim, dropout=0.1):
		super(FeedforwardModel, self).__init__()
		# layers
		self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(32, output_dim)
        )
	
	def forward(self, x):
		return self.model(x).view(-1,1)

In [83]:
def objective(trial: optuna.Trial, X_train, y_train, X_test, y_test, input_dim, output_dim):
	# Definimos los hiperparámetros a buscar
	dropout = trial.suggest_float('dropout', 0.1, 0.5)
	learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
	weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)
	num_epochs = trial.suggest_int('num_epochs', 10, 100)

	# model initialization 
	model = FeedforwardModel(input_dim, output_dim, dropout)
	if DEVICE.type == 'cuda':
		model = model.to(DEVICE)
	criterion = nn.MSELoss()
	optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
	# training
	model.train()
	for epoch in range(num_epochs):
		optimizer.zero_grad()
		output = model(X_train)
		loss = criterion(output, y_train)
		loss.backward()
		optimizer.step()
	# evaluation
	model.eval()
	with torch.no_grad():
		predictions = model(X_test)
		val_loss = criterion(predictions, y_test)

		# trial.report(val_loss.item(), epoch+1)
		# if trial.should_prune():
		# 	raise optuna.TrialPruned()
	print(f"Trial: {trial.number} - Loss: {loss.item()} - Val Loss: {val_loss.item()}")
	return val_loss.item()

# Hyperparameters Optimization

In [84]:
n_trials = 25
study_g = None
study_st = None
study_mm = None

## General

In [85]:
# configuration optuna
study_g = optuna.create_study(direction='minimize', sampler=sampler)
study_g.optimize(lambda trial: objective(trial, X_g_train, y_g_train, X_g_test, y_g_test, len(features), 1), n_trials=n_trials)

[I 2024-07-03 09:32:17,045] A new study created in memory with name: no-name-253d6053-7505-499c-8198-2d368ab8e1a4
[I 2024-07-03 09:32:17,150] Trial 0 finished with value: 296.7495422363281 and parameters: {'dropout': 0.249816047538945, 'learning_rate': 0.0071144760093434225, 'weight_decay': 0.001570297088405539, 'num_epochs': 64}. Best is trial 0 with value: 296.7495422363281.
[I 2024-07-03 09:32:17,288] Trial 1 finished with value: 813.3651733398438 and parameters: {'dropout': 0.1624074561769746, 'learning_rate': 2.9375384576328295e-05, 'weight_decay': 1.493656855461762e-05, 'num_epochs': 88}. Best is trial 0 with value: 296.7495422363281.


Trial: 0 - Loss: 53.22903823852539 - Val Loss: 296.7495422363281
Trial: 1 - Loss: 747.9529418945312 - Val Loss: 813.3651733398438


[I 2024-07-03 09:32:17,475] Trial 2 finished with value: 154.66673278808594 and parameters: {'dropout': 0.34044600469728353, 'learning_rate': 0.001331121608073689, 'weight_decay': 1.1527987128232396e-05, 'num_epochs': 98}. Best is trial 2 with value: 154.66673278808594.
[I 2024-07-03 09:32:17,528] Trial 3 finished with value: 815.432861328125 and parameters: {'dropout': 0.4329770563201687, 'learning_rate': 4.335281794951564e-05, 'weight_decay': 3.511356313970405e-05, 'num_epochs': 26}. Best is trial 2 with value: 154.66673278808594.
[I 2024-07-03 09:32:17,602] Trial 4 finished with value: 782.8799438476562 and parameters: {'dropout': 0.2216968971838151, 'learning_rate': 0.00037520558551242813, 'weight_decay': 0.00019762189340280086, 'num_epochs': 36}. Best is trial 2 with value: 154.66673278808594.


Trial: 2 - Loss: 198.63522338867188 - Val Loss: 154.66673278808594
Trial: 3 - Loss: 748.2371215820312 - Val Loss: 815.432861328125
Trial: 4 - Loss: 726.1752319335938 - Val Loss: 782.8799438476562


[I 2024-07-03 09:32:17,726] Trial 5 finished with value: 812.0726318359375 and parameters: {'dropout': 0.34474115788895177, 'learning_rate': 2.621087878265438e-05, 'weight_decay': 7.52374288453485e-05, 'num_epochs': 43}. Best is trial 2 with value: 154.66673278808594.
[I 2024-07-03 09:32:17,814] Trial 6 finished with value: 144.5731201171875 and parameters: {'dropout': 0.28242799368681437, 'learning_rate': 0.0022673986523780395, 'weight_decay': 3.972110727381908e-05, 'num_epochs': 56}. Best is trial 6 with value: 144.5731201171875.
[I 2024-07-03 09:32:17,853] Trial 7 finished with value: 797.64501953125 and parameters: {'dropout': 0.33696582754481696, 'learning_rate': 1.3783237455007187e-05, 'weight_decay': 0.0006647135865318024, 'num_epochs': 25}. Best is trial 6 with value: 144.5731201171875.


Trial: 5 - Loss: 753.51904296875 - Val Loss: 812.0726318359375
Trial: 6 - Loss: 193.30221557617188 - Val Loss: 144.5731201171875
Trial: 7 - Loss: 740.494873046875 - Val Loss: 797.64501953125


[I 2024-07-03 09:32:17,964] Trial 8 finished with value: 307.912353515625 and parameters: {'dropout': 0.12602063719411183, 'learning_rate': 0.007025166339242158, 'weight_decay': 0.00788671412999049, 'num_epochs': 83}. Best is trial 6 with value: 144.5731201171875.
[I 2024-07-03 09:32:18,033] Trial 9 finished with value: 794.8218383789062 and parameters: {'dropout': 0.2218455076693483, 'learning_rate': 1.9634341572933304e-05, 'weight_decay': 0.0011290133559092666, 'num_epochs': 50}. Best is trial 6 with value: 144.5731201171875.
[I 2024-07-03 09:32:18,065] Trial 10 finished with value: 784.3370971679688 and parameters: {'dropout': 0.47805461769215474, 'learning_rate': 0.0013134119273039457, 'weight_decay': 0.00012769865356796517, 'num_epochs': 11}. Best is trial 6 with value: 144.5731201171875.


Trial: 8 - Loss: 37.99417495727539 - Val Loss: 307.912353515625
Trial: 9 - Loss: 737.200927734375 - Val Loss: 794.8218383789062
Trial: 10 - Loss: 735.0590209960938 - Val Loss: 784.3370971679688


[I 2024-07-03 09:32:18,175] Trial 11 finished with value: 226.7593231201172 and parameters: {'dropout': 0.3860678448787181, 'learning_rate': 0.0014909362373379702, 'weight_decay': 1.0603432178801931e-05, 'num_epochs': 68}. Best is trial 6 with value: 144.5731201171875.
[I 2024-07-03 09:32:18,322] Trial 12 finished with value: 163.3181610107422 and parameters: {'dropout': 0.2900569850269856, 'learning_rate': 0.0014110878546023105, 'weight_decay': 3.242310994927044e-05, 'num_epochs': 96}. Best is trial 6 with value: 144.5731201171875.


Trial: 11 - Loss: 326.3454284667969 - Val Loss: 226.7593231201172
Trial: 12 - Loss: 145.05467224121094 - Val Loss: 163.3181610107422


[I 2024-07-03 09:32:18,479] Trial 13 finished with value: 713.0650024414062 and parameters: {'dropout': 0.30171829521640287, 'learning_rate': 0.0003463773882261673, 'weight_decay': 3.713889621943575e-05, 'num_epochs': 75}. Best is trial 6 with value: 144.5731201171875.
[I 2024-07-03 09:32:18,598] Trial 14 finished with value: 144.3907470703125 and parameters: {'dropout': 0.38457602627124554, 'learning_rate': 0.0026316436234139966, 'weight_decay': 1.030523857157598e-05, 'num_epochs': 57}. Best is trial 14 with value: 144.3907470703125.


Trial: 13 - Loss: 687.477294921875 - Val Loss: 713.0650024414062
Trial: 14 - Loss: 197.13507080078125 - Val Loss: 144.3907470703125


[I 2024-07-03 09:32:18,693] Trial 15 finished with value: 139.8876495361328 and parameters: {'dropout': 0.416587732899981, 'learning_rate': 0.0030840943889292815, 'weight_decay': 8.084788474071657e-05, 'num_epochs': 56}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:18,807] Trial 16 finished with value: 272.3001708984375 and parameters: {'dropout': 0.49271790728496245, 'learning_rate': 0.0043692404557447535, 'weight_decay': 0.000355634629244513, 'num_epochs': 60}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:18,885] Trial 17 finished with value: 802.8743286132812 and parameters: {'dropout': 0.4387984493554663, 'learning_rate': 0.00012498387838491074, 'weight_decay': 7.878316398392372e-05, 'num_epochs': 47}. Best is trial 15 with value: 139.8876495361328.


Trial: 15 - Loss: 204.10658264160156 - Val Loss: 139.8876495361328
Trial: 16 - Loss: 104.1921615600586 - Val Loss: 272.3001708984375
Trial: 17 - Loss: 742.9776611328125 - Val Loss: 802.8743286132812


[I 2024-07-03 09:32:18,996] Trial 18 finished with value: 252.97035217285156 and parameters: {'dropout': 0.3927473884428378, 'learning_rate': 0.003571982848223593, 'weight_decay': 0.004106194081414188, 'num_epochs': 72}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:19,053] Trial 19 finished with value: 754.1903076171875 and parameters: {'dropout': 0.39193393181268055, 'learning_rate': 0.0006734669184463323, 'weight_decay': 0.00035468816593847116, 'num_epochs': 34}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:19,168] Trial 20 finished with value: 773.4823608398438 and parameters: {'dropout': 0.42172903848993215, 'learning_rate': 0.00017269608338265821, 'weight_decay': 2.0280552498073888e-05, 'num_epochs': 78}. Best is trial 15 with value: 139.8876495361328.


Trial: 18 - Loss: 79.70956420898438 - Val Loss: 252.97035217285156
Trial: 19 - Loss: 708.3399658203125 - Val Loss: 754.1903076171875
Trial: 20 - Loss: 720.910400390625 - Val Loss: 773.4823608398438


[I 2024-07-03 09:32:19,260] Trial 21 finished with value: 184.6476287841797 and parameters: {'dropout': 0.2889287846668806, 'learning_rate': 0.0033553436176327147, 'weight_decay': 6.514463518556296e-05, 'num_epochs': 56}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:19,346] Trial 22 finished with value: 153.8939208984375 and parameters: {'dropout': 0.3668796413511928, 'learning_rate': 0.002498505869197317, 'weight_decay': 2.8991999698009264e-05, 'num_epochs': 55}. Best is trial 15 with value: 139.8876495361328.
[I 2024-07-03 09:32:19,441] Trial 23 finished with value: 629.4324951171875 and parameters: {'dropout': 0.4632773078848643, 'learning_rate': 0.0007149563603451041, 'weight_decay': 0.00015438762316603123, 'num_epochs': 63}. Best is trial 15 with value: 139.8876495361328.


Trial: 21 - Loss: 136.29832458496094 - Val Loss: 184.6476287841797
Trial: 22 - Loss: 219.6670684814453 - Val Loss: 153.8939208984375
Trial: 23 - Loss: 619.0084228515625 - Val Loss: 629.4324951171875


[I 2024-07-03 09:32:19,519] Trial 24 finished with value: 251.90310668945312 and parameters: {'dropout': 0.31289521538268195, 'learning_rate': 0.009291628856246575, 'weight_decay': 5.673656109950271e-05, 'num_epochs': 42}. Best is trial 15 with value: 139.8876495361328.


Trial: 24 - Loss: 70.38103485107422 - Val Loss: 251.90310668945312


In [86]:
# Results
print(f'Número de pruebas: {len(study_g.trials)}')
trial = study_g.best_trial
print(f'Mejor prueba: {trial.number}')
print(f'Mejores parametros: {trial.params}')
print(f'Mejor valor de pérdida en validación: {trial.value}')

Número de pruebas: 25
Mejor prueba: 15
Mejores parametros: {'dropout': 0.416587732899981, 'learning_rate': 0.0030840943889292815, 'weight_decay': 8.084788474071657e-05, 'num_epochs': 56}
Mejor valor de pérdida en validación: 139.8876495361328


## Single Thread

In [87]:
# configuration optuna
study_st = optuna.create_study(direction='minimize', sampler=sampler)
study_st.optimize(lambda trial: objective(trial, X_st_train, y_st_train, X_st_test, y_st_test, len(features_st), 1), n_trials=n_trials)

[I 2024-07-03 09:32:19,542] A new study created in memory with name: no-name-52555f44-3590-4a66-b2ab-c40b1f1f96f6
[I 2024-07-03 09:32:19,666] Trial 0 finished with value: 555.745361328125 and parameters: {'dropout': 0.14881529393791154, 'learning_rate': 0.0003058656666978527, 'weight_decay': 1.2681352169084594e-05, 'num_epochs': 92}. Best is trial 0 with value: 555.745361328125.


Trial: 0 - Loss: 813.6718139648438 - Val Loss: 555.745361328125


[I 2024-07-03 09:32:19,743] Trial 1 finished with value: 372.0975036621094 and parameters: {'dropout': 0.20351199264000677, 'learning_rate': 0.0009717775305059633, 'weight_decay': 8.612579192594876e-05, 'num_epochs': 57}. Best is trial 1 with value: 372.0975036621094.


Trial: 1 - Loss: 652.382080078125 - Val Loss: 372.0975036621094


[I 2024-07-03 09:32:19,876] Trial 2 finished with value: 636.5726928710938 and parameters: {'dropout': 0.31868411173731187, 'learning_rate': 3.585612610345396e-05, 'weight_decay': 0.008105016126411584, 'num_epochs': 80}. Best is trial 1 with value: 372.0975036621094.


Trial: 2 - Loss: 867.8101806640625 - Val Loss: 636.5726928710938


[I 2024-07-03 09:32:20,012] Trial 3 finished with value: 202.14295959472656 and parameters: {'dropout': 0.4757995766256756, 'learning_rate': 0.004835952776465951, 'weight_decay': 0.0006218704727769079, 'num_epochs': 93}. Best is trial 3 with value: 202.14295959472656.
[I 2024-07-03 09:32:20,062] Trial 4 finished with value: 640.0482177734375 and parameters: {'dropout': 0.1353970008207678, 'learning_rate': 3.872118032174584e-05, 'weight_decay': 1.3667272915456215e-05, 'num_epochs': 39}. Best is trial 3 with value: 202.14295959472656.


Trial: 3 - Loss: 83.67191314697266 - Val Loss: 202.14295959472656
Trial: 4 - Loss: 864.1929321289062 - Val Loss: 640.0482177734375


[I 2024-07-03 09:32:20,123] Trial 5 finished with value: 620.8641357421875 and parameters: {'dropout': 0.25547091587579285, 'learning_rate': 6.516990611177177e-05, 'weight_decay': 0.003063462210622081, 'num_epochs': 42}. Best is trial 3 with value: 202.14295959472656.


Trial: 5 - Loss: 852.6903076171875 - Val Loss: 620.8641357421875


[I 2024-07-03 09:32:20,243] Trial 6 finished with value: 508.642578125 and parameters: {'dropout': 0.2123738038749523, 'learning_rate': 0.00042470585622618684, 'weight_decay': 2.6471141828218167e-05, 'num_epochs': 82}. Best is trial 3 with value: 202.14295959472656.
[I 2024-07-03 09:32:20,305] Trial 7 finished with value: 138.33213806152344 and parameters: {'dropout': 0.12982025747190834, 'learning_rate': 0.009133995846860976, 'weight_decay': 0.0020736445177905022, 'num_epochs': 28}. Best is trial 7 with value: 138.33213806152344.


Trial: 6 - Loss: 776.3043212890625 - Val Loss: 508.642578125
Trial: 7 - Loss: 85.29792785644531 - Val Loss: 138.33213806152344


[I 2024-07-03 09:32:20,441] Trial 8 finished with value: 137.92962646484375 and parameters: {'dropout': 0.10220884684944097, 'learning_rate': 0.0027950159165083337, 'weight_decay': 0.001319994226153501, 'num_epochs': 76}. Best is trial 8 with value: 137.92962646484375.


Trial: 8 - Loss: 88.39659881591797 - Val Loss: 137.92962646484375


[I 2024-07-03 09:32:20,467] Trial 9 finished with value: 635.9219970703125 and parameters: {'dropout': 0.4085081386743783, 'learning_rate': 1.667761543019792e-05, 'weight_decay': 0.0001189589673755355, 'num_epochs': 20}. Best is trial 8 with value: 137.92962646484375.
[I 2024-07-03 09:32:20,598] Trial 10 finished with value: 135.0571746826172 and parameters: {'dropout': 0.3392365094076081, 'learning_rate': 0.0015166810318657262, 'weight_decay': 0.0005795004515525634, 'num_epochs': 64}. Best is trial 10 with value: 135.0571746826172.


Trial: 9 - Loss: 860.1724853515625 - Val Loss: 635.9219970703125
Trial: 10 - Loss: 342.4717712402344 - Val Loss: 135.0571746826172


[I 2024-07-03 09:32:20,720] Trial 11 finished with value: 118.88919830322266 and parameters: {'dropout': 0.34358331215984184, 'learning_rate': 0.002107194199205492, 'weight_decay': 0.0006603362526769413, 'num_epochs': 67}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:20,821] Trial 12 finished with value: 263.7709655761719 and parameters: {'dropout': 0.35096259471514535, 'learning_rate': 0.001517031977234539, 'weight_decay': 0.00025020805936474996, 'num_epochs': 63}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:20,914] Trial 13 finished with value: 539.97607421875 and parameters: {'dropout': 0.39196113622823486, 'learning_rate': 0.00046434579912091106, 'weight_decay': 0.0005091868215676138, 'num_epochs': 62}. Best is trial 11 with value: 118.88919830322266.


Trial: 11 - Loss: 236.0889129638672 - Val Loss: 118.88919830322266
Trial: 12 - Loss: 464.82012939453125 - Val Loss: 263.7709655761719
Trial: 13 - Loss: 786.6491088867188 - Val Loss: 539.97607421875


[I 2024-07-03 09:32:20,996] Trial 14 finished with value: 325.2593994140625 and parameters: {'dropout': 0.28225357713904276, 'learning_rate': 0.0015118648819201833, 'weight_decay': 0.00016235205985611496, 'num_epochs': 45}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,111] Trial 15 finished with value: 615.0523071289062 and parameters: {'dropout': 0.45300687339442414, 'learning_rate': 0.0001630104176172594, 'weight_decay': 0.0008301975890770384, 'num_epochs': 70}. Best is trial 11 with value: 118.88919830322266.


Trial: 14 - Loss: 618.880126953125 - Val Loss: 325.2593994140625
Trial: 15 - Loss: 845.7090454101562 - Val Loss: 615.0523071289062


[I 2024-07-03 09:32:21,230] Trial 16 finished with value: 177.05230712890625 and parameters: {'dropout': 0.3578746766016598, 'learning_rate': 0.004041305326197806, 'weight_decay': 5.685538581694497e-05, 'num_epochs': 52}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,344] Trial 17 finished with value: 235.15203857421875 and parameters: {'dropout': 0.4165449480114406, 'learning_rate': 0.0009931360683066623, 'weight_decay': 0.007316083424947324, 'num_epochs': 70}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,398] Trial 18 finished with value: 286.0962829589844 and parameters: {'dropout': 0.31447155495373347, 'learning_rate': 0.0023494489390971486, 'weight_decay': 0.00035649849079328347, 'num_epochs': 31}. Best is trial 11 with value: 118.88919830322266.


Trial: 16 - Loss: 164.76148986816406 - Val Loss: 177.05230712890625
Trial: 17 - Loss: 552.55078125 - Val Loss: 235.15203857421875
Trial: 18 - Loss: 607.0214233398438 - Val Loss: 286.0962829589844


[I 2024-07-03 09:32:21,434] Trial 19 finished with value: 297.0113525390625 and parameters: {'dropout': 0.24229147272068757, 'learning_rate': 0.008788995867573658, 'weight_decay': 0.001413049023281875, 'num_epochs': 10}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,512] Trial 20 finished with value: 615.337646484375 and parameters: {'dropout': 0.3681261814839239, 'learning_rate': 0.00020927228599965385, 'weight_decay': 0.00333367175175128, 'num_epochs': 51}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,625] Trial 21 finished with value: 192.7498321533203 and parameters: {'dropout': 0.3042450613476117, 'learning_rate': 0.0035158568062257835, 'weight_decay': 0.0011008424962161236, 'num_epochs': 79}. Best is trial 11 with value: 118.88919830322266.


Trial: 19 - Loss: 615.6503295898438 - Val Loss: 297.0113525390625
Trial: 20 - Loss: 852.158203125 - Val Loss: 615.337646484375
Trial: 21 - Loss: 77.7697982788086 - Val Loss: 192.7498321533203


[I 2024-07-03 09:32:21,754] Trial 22 finished with value: 266.2166748046875 and parameters: {'dropout': 0.17540583322921238, 'learning_rate': 0.0008412697713067563, 'weight_decay': 0.0002867769035991269, 'num_epochs': 100}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,843] Trial 23 finished with value: 138.17356872558594 and parameters: {'dropout': 0.26583136129268875, 'learning_rate': 0.0023382453058034824, 'weight_decay': 0.0015062501276719216, 'num_epochs': 72}. Best is trial 11 with value: 118.88919830322266.
[I 2024-07-03 09:32:21,923] Trial 24 finished with value: 182.50863647460938 and parameters: {'dropout': 0.32712218850827757, 'learning_rate': 0.00590072050230189, 'weight_decay': 0.0005315810327925734, 'num_epochs': 63}. Best is trial 11 with value: 118.88919830322266.


Trial: 22 - Loss: 533.56787109375 - Val Loss: 266.2166748046875
Trial: 23 - Loss: 152.9484100341797 - Val Loss: 138.17356872558594
Trial: 24 - Loss: 64.49761199951172 - Val Loss: 182.50863647460938


In [88]:
# Results
print(f'Número de pruebas: {len(study_st.trials)}')
trial = study_st.best_trial
print(f'Mejor prueba: {trial.number}')
print(f'Mejores parametros: {trial.params}')
print(f'Mejor valor de pérdida en validación: {trial.value}')

Número de pruebas: 25
Mejor prueba: 11
Mejores parametros: {'dropout': 0.34358331215984184, 'learning_rate': 0.002107194199205492, 'weight_decay': 0.0006603362526769413, 'num_epochs': 67}
Mejor valor de pérdida en validación: 118.88919830322266


## Multi Thread

In [89]:
# configuration optuna
study_mm = optuna.create_study(direction='minimize', sampler=sampler)
study_mm.optimize(lambda trial: objective(trial, X_mm_train, y_mm_train, X_mm_test, y_mm_test, len(features), 1), n_trials=n_trials)

[I 2024-07-03 09:32:21,944] A new study created in memory with name: no-name-a3f182e7-b4b2-4ee5-966a-cb170e959986
[I 2024-07-03 09:32:21,971] Trial 0 finished with value: 1271.623046875 and parameters: {'dropout': 0.4452413703502375, 'learning_rate': 0.0007411299781083245, 'weight_decay': 9.833181933644887e-05, 'num_epochs': 15}. Best is trial 0 with value: 1271.623046875.
[I 2024-07-03 09:32:22,067] Trial 1 finished with value: 1337.6160888671875 and parameters: {'dropout': 0.2243929286862649, 'learning_rate': 9.452571391072311e-05, 'weight_decay': 0.0015446089075047066, 'num_epochs': 68}. Best is trial 0 with value: 1271.623046875.


Trial: 0 - Loss: 263.78118896484375 - Val Loss: 1271.623046875
Trial: 1 - Loss: 278.40606689453125 - Val Loss: 1337.6160888671875


[I 2024-07-03 09:32:22,210] Trial 2 finished with value: 1262.471435546875 and parameters: {'dropout': 0.4548850970305306, 'learning_rate': 0.00026100256506134784, 'weight_decay': 2.284455685002053e-05, 'num_epochs': 74}. Best is trial 2 with value: 1262.471435546875.
[I 2024-07-03 09:32:22,298] Trial 3 finished with value: 1122.4945068359375 and parameters: {'dropout': 0.40431401944675904, 'learning_rate': 0.00048287152161792117, 'weight_decay': 0.002055424552015075, 'num_epochs': 54}. Best is trial 3 with value: 1122.4945068359375.
[I 2024-07-03 09:32:22,324] Trial 4 finished with value: 1332.5672607421875 and parameters: {'dropout': 0.3090931317527976, 'learning_rate': 0.00019170041589170651, 'weight_decay': 1.1919481947918725e-05, 'num_epochs': 19}. Best is trial 3 with value: 1122.4945068359375.
[I 2024-07-03 09:32:22,402] Trial 5 finished with value: 1083.412841796875 and parameters: {'dropout': 0.11257167427469371, 'learning_rate': 0.0008113929572637835, 'weight_decay': 8.771380

Trial: 2 - Loss: 256.2961730957031 - Val Loss: 1262.471435546875
Trial: 3 - Loss: 222.63624572753906 - Val Loss: 1122.4945068359375
Trial: 4 - Loss: 277.1650695800781 - Val Loss: 1332.5672607421875
Trial: 5 - Loss: 199.33364868164062 - Val Loss: 1083.412841796875


[I 2024-07-03 09:32:22,504] Trial 6 finished with value: 1310.3779296875 and parameters: {'dropout': 0.4630265895704372, 'learning_rate': 5.59598687800608e-05, 'weight_decay': 0.0001702741688676439, 'num_epochs': 78}. Best is trial 5 with value: 1083.412841796875.
[I 2024-07-03 09:32:22,535] Trial 7 finished with value: 1337.34375 and parameters: {'dropout': 0.191519266196649, 'learning_rate': 1.7019223026554023e-05, 'weight_decay': 7.40038575908737e-05, 'num_epochs': 24}. Best is trial 5 with value: 1083.412841796875.
[I 2024-07-03 09:32:22,644] Trial 8 finished with value: 721.9096069335938 and parameters: {'dropout': 0.47187906093702925, 'learning_rate': 0.002656813924114493, 'weight_decay': 0.000794714742465374, 'num_epochs': 89}. Best is trial 8 with value: 721.9096069335938.


Trial: 6 - Loss: 269.3637390136719 - Val Loss: 1310.3779296875
Trial: 7 - Loss: 277.22418212890625 - Val Loss: 1337.34375
Trial: 8 - Loss: 33.35989761352539 - Val Loss: 721.9096069335938


[I 2024-07-03 09:32:22,713] Trial 9 finished with value: 1320.32470703125 and parameters: {'dropout': 0.4214688307596458, 'learning_rate': 3.6283583803549155e-05, 'weight_decay': 0.004760767751809498, 'num_epochs': 59}. Best is trial 8 with value: 721.9096069335938.
[I 2024-07-03 09:32:22,839] Trial 10 finished with value: 721.0247192382812 and parameters: {'dropout': 0.3016718825945298, 'learning_rate': 0.008267252353167429, 'weight_decay': 0.0006843584777543845, 'num_epochs': 100}. Best is trial 10 with value: 721.0247192382812.


Trial: 9 - Loss: 272.80224609375 - Val Loss: 1320.32470703125
Trial: 10 - Loss: 20.940799713134766 - Val Loss: 721.0247192382812


[I 2024-07-03 09:32:22,969] Trial 11 finished with value: 684.8622436523438 and parameters: {'dropout': 0.3254868023303965, 'learning_rate': 0.007424308162209415, 'weight_decay': 0.0005814221939072099, 'num_epochs': 100}. Best is trial 11 with value: 684.8622436523438.
[I 2024-07-03 09:32:23,091] Trial 12 finished with value: 735.5314331054688 and parameters: {'dropout': 0.3283405485921187, 'learning_rate': 0.009436348820661826, 'weight_decay': 0.0004992224422085127, 'num_epochs': 99}. Best is trial 11 with value: 684.8622436523438.


Trial: 11 - Loss: 21.416576385498047 - Val Loss: 684.8622436523438
Trial: 12 - Loss: 23.29034996032715 - Val Loss: 735.5314331054688


[I 2024-07-03 09:32:23,212] Trial 13 finished with value: 721.3323974609375 and parameters: {'dropout': 0.3627775662479934, 'learning_rate': 0.009531593881769825, 'weight_decay': 0.006661048385099137, 'num_epochs': 100}. Best is trial 11 with value: 684.8622436523438.
[I 2024-07-03 09:32:23,264] Trial 14 finished with value: 488.4656982421875 and parameters: {'dropout': 0.24852020936517055, 'learning_rate': 0.003211490726108927, 'weight_decay': 0.0003209795309976103, 'num_epochs': 37}. Best is trial 14 with value: 488.4656982421875.
[I 2024-07-03 09:32:23,318] Trial 15 finished with value: 668.3042602539062 and parameters: {'dropout': 0.22908265453989607, 'learning_rate': 0.0024966594970470893, 'weight_decay': 0.0002555286977638115, 'num_epochs': 36}. Best is trial 14 with value: 488.4656982421875.
[I 2024-07-03 09:32:23,367] Trial 16 finished with value: 937.6055908203125 and parameters: {'dropout': 0.23508880072524915, 'learning_rate': 0.0021038074659815, 'weight_decay': 0.0002494145

Trial: 13 - Loss: 26.361879348754883 - Val Loss: 721.3323974609375
Trial: 14 - Loss: 56.73611831665039 - Val Loss: 488.4656982421875
Trial: 15 - Loss: 107.42731475830078 - Val Loss: 668.3042602539062
Trial: 16 - Loss: 154.67919921875 - Val Loss: 937.6055908203125


[I 2024-07-03 09:32:23,428] Trial 17 finished with value: 524.52197265625 and parameters: {'dropout': 0.1532912859582702, 'learning_rate': 0.0025931986049053933, 'weight_decay': 4.4878433561040476e-05, 'num_epochs': 40}. Best is trial 14 with value: 488.4656982421875.
[I 2024-07-03 09:32:23,489] Trial 18 finished with value: 979.100341796875 and parameters: {'dropout': 0.12838509029822448, 'learning_rate': 0.0014552883974622373, 'weight_decay': 3.658994866331239e-05, 'num_epochs': 42}. Best is trial 14 with value: 488.4656982421875.
[I 2024-07-03 09:32:23,552] Trial 19 finished with value: 421.8748474121094 and parameters: {'dropout': 0.16494674469519044, 'learning_rate': 0.003216662064309068, 'weight_decay': 3.763118218173766e-05, 'num_epochs': 44}. Best is trial 19 with value: 421.8748474121094.
[I 2024-07-03 09:32:23,606] Trial 20 finished with value: 375.4826354980469 and parameters: {'dropout': 0.26513558297655493, 'learning_rate': 0.004216910458833383, 'weight_decay': 1.102237247

Trial: 17 - Loss: 66.75946044921875 - Val Loss: 524.52197265625
Trial: 18 - Loss: 175.5945587158203 - Val Loss: 979.100341796875
Trial: 19 - Loss: 57.650291442871094 - Val Loss: 421.8748474121094
Trial: 20 - Loss: 58.07162857055664 - Val Loss: 375.4826354980469


[I 2024-07-03 09:32:23,660] Trial 21 finished with value: 471.02081298828125 and parameters: {'dropout': 0.2657724769815869, 'learning_rate': 0.0045954676525575364, 'weight_decay': 1.1915260210248824e-05, 'num_epochs': 28}. Best is trial 20 with value: 375.4826354980469.
[I 2024-07-03 09:32:23,690] Trial 22 finished with value: 1128.2930908203125 and parameters: {'dropout': 0.27724116912608, 'learning_rate': 0.004059458224618988, 'weight_decay': 1.1281551503694828e-05, 'num_epochs': 10}. Best is trial 20 with value: 375.4826354980469.
[I 2024-07-03 09:32:23,738] Trial 23 finished with value: 1219.1251220703125 and parameters: {'dropout': 0.15780056270294396, 'learning_rate': 0.0011234352251962333, 'weight_decay': 2.234707836685487e-05, 'num_epochs': 26}. Best is trial 20 with value: 375.4826354980469.
[I 2024-07-03 09:32:23,808] Trial 24 finished with value: 726.0936279296875 and parameters: {'dropout': 0.1878191738011098, 'learning_rate': 0.005532909018446567, 'weight_decay': 1.904166

Trial: 21 - Loss: 58.00645065307617 - Val Loss: 471.02081298828125
Trial: 22 - Loss: 230.42709350585938 - Val Loss: 1128.2930908203125
Trial: 23 - Loss: 249.2744903564453 - Val Loss: 1219.1251220703125
Trial: 24 - Loss: 31.814455032348633 - Val Loss: 726.0936279296875


In [90]:
# Results
print(f'Trials quantity: {len(study_mm.trials)}')
trial = study_mm.best_trial
print(f'Mejor prueba: {trial.number}')
print(f'Mejores parametros: {trial.params}')
print(f'Mejor valor de pérdida en validación: {trial.value}')

Trials quantity: 25
Mejor prueba: 20
Mejores parametros: {'dropout': 0.26513558297655493, 'learning_rate': 0.004216910458833383, 'weight_decay': 1.1022372473788155e-05, 'num_epochs': 28}
Mejor valor de pérdida en validación: 375.4826354980469


# Training

In [91]:
models_folder = '../models/feedforward'
output_dim = 1

In [92]:
#dump(scaler_g, f'{models_folder}/scaler_g.joblib')
#dump(scaler_st, f'{models_folder}/scaler_st.joblib')
#dump(scaler_mm, f'{models_folder}/scaler_mm.joblib')

## General

In [93]:
input_dim = len(features)
# hyperparameters
if study_g is not None:
	dropout = study_g.best_trial.params['dropout']
	lr = study_g.best_trial.params['learning_rate']
	wd = study_g.best_trial.params['weight_decay']
	num_epochs = study_g.best_trial.params['num_epochs']
else:
	dropout = 0.28242799368681437
	lr = 0.0022673986523780395
	wd = 3.972110727381908e-05
	num_epochs = 56

In [94]:
# general model initialization
model_g = FeedforwardModel(input_dim, output_dim)
if DEVICE.type == 'cuda':
	model_g = model_g.to(DEVICE)
criterion_g = nn.MSELoss()
optimizer_g = optim.AdamW(model_g.parameters(), lr=lr, weight_decay=wd)

model_g.train()

for epoch in range(num_epochs):
	optimizer_g.zero_grad()
	output = model_g(X_g_train)
	loss = criterion_g(output, y_g_train)
	loss.backward()
	optimizer_g.step()
	# validation
	if (epoch+1) % 10 == 0 or epoch == num_epochs-1:
		model_g.eval()
		with torch.no_grad():
			val_predictions = model_g(X_g_test)
			val_loss = criterion_g(val_predictions, y_g_test)
		print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')
		model_g.train()

Epoch 10/56, Loss: 712.6968383789062, Val Loss: 751.5004272460938
Epoch 20/56, Loss: 626.1070556640625, Val Loss: 630.6423950195312
Epoch 30/56, Loss: 461.3785400390625, Val Loss: 426.5197448730469
Epoch 40/56, Loss: 261.036865234375, Val Loss: 219.61253356933594
Epoch 50/56, Loss: 184.42160034179688, Val Loss: 187.02125549316406
Epoch 56/56, Loss: 161.03939819335938, Val Loss: 183.68875122070312


In [95]:
model_g.eval()
with torch.no_grad():
	preds = model_g(X_g_test).cpu().numpy().flatten()
mse = mean_squared_error(y_g_test.cpu().numpy().flatten(), preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_g_test.cpu().numpy().flatten(), preds)}")

MSE: 183.68875122070312 - RMSE: 13.553182601928711 - MAE: 9.794222831726074


In [96]:
# save model
torch.save(model_g, f'{models_folder}/general.pt')

## Single Thread

In [97]:
len(features_st)

14

In [98]:
input_dim = len(features_st)
# hyperparameters
if study_st is not None:
	dropout = study_st.best_trial.params['dropout']
	lr = study_st.best_trial.params['learning_rate']
	wd = study_st.best_trial.params['weight_decay']
	num_epochs = study_st.best_trial.params['num_epochs']
else:
	dropout = 0.3055256888961734
	lr = 0.0016955566008246274
	wd = 0.0004824955343940786
	num_epochs = 60

In [99]:
# single thread model initialization
model_st = FeedforwardModel(input_dim, output_dim)
if DEVICE.type == 'cuda':
	model_st = model_st.to(DEVICE)
criterion_st = nn.MSELoss()
optimizer_st = optim.AdamW(model_st.parameters(), lr=lr, weight_decay=wd)

model_st.train()

for epoch in range(num_epochs):
	optimizer_st.zero_grad()
	output = model_st(X_st_train)
	loss = criterion_st(output, y_st_train)
	loss.backward()
	optimizer_st.step()
	# validation
	if (epoch+1) % 10 == 0 or epoch == num_epochs-1:
		model_st.eval()
		with torch.no_grad():
			val_predictions = model_st(X_st_test)
			val_loss = criterion_st(val_predictions, y_st_test)
		print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')
		model_st.train()

Epoch 10/67, Loss: 806.4037475585938, Val Loss: 557.7090454101562
Epoch 20/67, Loss: 723.0169677734375, Val Loss: 454.73748779296875
Epoch 30/67, Loss: 591.5701904296875, Val Loss: 312.69879150390625
Epoch 40/67, Loss: 418.4253845214844, Val Loss: 169.8199920654297
Epoch 50/67, Loss: 253.6962890625, Val Loss: 130.82382202148438
Epoch 60/67, Loss: 177.65286254882812, Val Loss: 210.48114013671875
Epoch 67/67, Loss: 157.81683349609375, Val Loss: 224.82217407226562


In [100]:
model_st.eval()
with torch.no_grad():
	preds = model_st(X_st_test).cpu().numpy().flatten()
mse = mean_squared_error(y_st_test.cpu().numpy().flatten(), preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_st_test.cpu().numpy().flatten(), preds)}")

MSE: 224.82215881347656 - RMSE: 14.994071006774902 - MAE: 9.085762023925781


In [101]:
# save model
torch.save(model_st, f'{models_folder}/single_thread.pt')

## Multi Thread

In [102]:
input_dim = len(features)
# hyperparameters
if study_st is not None:
	dropout = study_mm.best_trial.params['dropout']
	lr = study_mm.best_trial.params['learning_rate']
	wd = study_mm.best_trial.params['weight_decay']
	num_epochs = study_mm.best_trial.params['num_epochs']
else:
	dropout = 0.2341957682464934
	lr = 0.005214775254501407
	wd = 0.0028270900999294063
	num_epochs = 30

In [103]:
# multi thread model initialization
model_mm = FeedforwardModel(input_dim, output_dim)
if DEVICE.type == 'cuda':
	model_mm = model_mm.to(DEVICE)
criterion_mm = nn.MSELoss()
optimizer_mm = optim.AdamW(model_mm.parameters(), lr=lr, weight_decay=wd)

model_mm.train()

for epoch in range(num_epochs):
	optimizer_mm.zero_grad()
	output = model_mm(X_mm_train)
	loss = criterion_mm(output, y_mm_train)
	loss.backward()
	optimizer_mm.step()
	# validation
	if (epoch+1) % 10 == 0 or epoch == num_epochs-1:
		model_mm.eval()
		with torch.no_grad():
			val_predictions = model_mm(X_mm_test)
			val_loss = criterion_mm(val_predictions, y_mm_test)
		val_loss_final = val_loss.item()
		print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')
		model_mm.train()

Epoch 10/28, Loss: 254.33627319335938, Val Loss: 1234.594482421875
Epoch 20/28, Loss: 179.36868286132812, Val Loss: 971.0455932617188
Epoch 28/28, Loss: 86.82102966308594, Val Loss: 632.05126953125


In [104]:
model_mm.eval()
with torch.no_grad():
	preds = model_mm(X_mm_test).cpu().numpy().flatten()
mse = mean_squared_error(y_mm_test.cpu().numpy().flatten(), preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_mm_test.cpu().numpy().flatten(), preds)}")

MSE: 632.05126953125 - RMSE: 25.1406307220459 - MAE: 24.924772262573242


In [105]:
# save model
torch.save(model_mm, f'{models_folder}/multi_thread.pt')

# Load models

In [106]:
model_g = torch.load(f'{models_folder}/general.pt').to(DEVICE)
model_st = torch.load(f'{models_folder}/single_thread.pt').to(DEVICE)
model_mm = torch.load(f'{models_folder}/multi_thread.pt').to(DEVICE)

In [107]:
def predict(model, X):
	model.eval()
	with torch.no_grad():
		prediction = model(X)
	return prediction

def describe_val(model, X, y):
	min_instance = {"prediction": float('inf'), "actual": 0, "index": 0}
	max_instance = {"prediction": 0, "actual": 0, "index": 0}
	
	predictions = predict(model, X).cpu().numpy().flatten()
	index_min = np.argmin(np.abs(predictions - y.cpu().numpy().flatten()))
	min_instance["prediction"] = predictions[index_min]
	min_instance["actual"] = y.cpu().numpy().flatten()[index_min]
	min_instance["index"] = index_min
	index_max = np.argmax(np.abs(predictions - y.cpu().numpy().flatten()))
	max_instance["prediction"] = predictions[index_max]
	max_instance["actual"] = y.cpu().numpy().flatten()[index_max]
	max_instance["index"] = index_max

	return min_instance, max_instance, predictions

In [108]:
# general model
print("Validation set general model")
min_instance, max_instance, predictions = describe_val(model_g, X_g_test, y_g_test)
errors = np.abs(predictions - y_g_test.cpu().numpy().flatten())
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_g_test.cpu().numpy().flatten())} | Std actual: {np.std(y_g_test.cpu().numpy().flatten())}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set general model
Mean prediction: 26.374778747558594 | Std actual: 9.699380874633789
Mean actual: 27.556499481201172 | Std actual: 7.411326885223389
Mean Error: 9.794222831726074 | Std Error: 9.368133544921875
---
Min instance
total_time                                                           19.12
total_cpu_usage                                                       0.99
max_ram_usage                                                    25.320312
brand_raw                         Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz
count                                                                   40
l2_cache_size                                                         40.0
l3_cache_size                                                         27.5
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark                                                              TSP
ghz_actual_

In [109]:
# single thread model
print("Validation set single thread model")
min_instance, max_instance, predictions = describe_val(model_st, X_st_test, y_st_test)
errors = np.abs(predictions - y_st_test.cpu().numpy().flatten())
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_st_test.cpu().numpy().flatten())} | Std actual: {np.std(y_st_test.cpu().numpy().flatten())}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(st_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(st_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set single thread model
Mean prediction: 29.39603614807129 | Std actual: 12.513587951660156
Mean actual: 24.564001083374023 | Std actual: 6.096883296966553
Mean Error: 9.085762023925781 | Std Error: 11.92774486541748
Min instance
total_time                                                           16.03
total_cpu_usage                                                       0.99
max_ram_usage                                                  1432.585938
brand_raw                         Intel(R) Xeon(R) Gold 6230 CPU @ 2.10GHz
count                                                                   40
l2_cache_size                                                         40.0
l3_cache_size                                                         27.5
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark                                                              KNP
ghz_actua

In [110]:
# multi thread model
print("Validation set multi thread model")
min_instance, max_instance, predictions = describe_val(model_mm, X_mm_test, y_mm_test)
errors = np.abs(predictions - y_mm_test.cpu().numpy().flatten())
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_mm_test.cpu().numpy().flatten())} | Std actual: {np.std(y_mm_test.cpu().numpy().flatten())}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(mm_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(mm_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set multi thread model
Mean prediction: 11.609227180480957 | Std actual: 3.1737313270568848
Mean actual: 36.534000396728516 | Std actual: 0.8569851517677307
Mean Error: 24.924772262573242 | Std Error: 3.2873992919921875
Min instance
total_time                                                            26.66
total_cpu_usage                                                        1.92
max_ram_usage                                                   2332.976562
brand_raw                         Intel(R) Xeon(R) CPU E5-2623 v3 @ 3.00GHz
count                                                                     8
l2_cache_size                                                           2.0
l3_cache_size                                                          10.0
l2_cache_line_size                                                      256
l2_cache_associativity                                                    2
benchmark                                                       MATRIX_M