In [1]:
import optuna
from optuna.samplers import TPESampler
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pytorch_tabnet.tab_model import TabNetRegressor
import xgboost as xgb
import random
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from joblib import dump

In [2]:
# CUDA
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE.type

'cuda'

In [3]:
# Fix random seed
seed = 42
torch.manual_seed(seed)
if DEVICE.type == 'cuda':
	torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
sampler = TPESampler(seed=seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Pre-processing input data

In [4]:
def bits_to_MiB(row):
	# verify if has string ' MiB'
	if 'MiB' in str(row):
		row = row.replace(' MiB', '')
		row = float(row)
	else:
		row = float(row) / np.power(2, 20)
	return row


def MHz_to_GHz(row):
	# verify if has string ' GHz'
	if 'GHz' in str(row):
		row = row.replace(' GHz', '')
		# convert to float
		row = float(row)
	else:
		row = row.replace(' MHz', '')
		row = float(row) / 1000
	return row

In [5]:
results_df = pd.read_csv('../results_new/execution_time.csv')
results_savio_df = pd.read_csv('../results_savio_new/execution_time.csv')
results_df = pd.concat([results_df, results_savio_df], ignore_index=True)
# preprocessing
results_df['total_cpu_usage'] = results_df['total_cpu_usage'].str.replace('%', '').astype(float) / 100
results_df['max_ram_usage'] = results_df['max_ram_usage'] / 1024
results_df['l2_cache_size'] = results_df['l2_cache_size'].apply(bits_to_MiB)
results_df['l3_cache_size'] = results_df['l3_cache_size'].apply(bits_to_MiB)
results_df['ghz_actual_friendly'] = results_df['hz_actual_friendly'].apply(MHz_to_GHz)
results_df['ghz_advertised_friendly'] = results_df['hz_advertised_friendly'].str.replace('GHz', '').astype(float)
results_df = results_df.drop(columns=['hz_actual_friendly', 'hz_advertised_friendly', 'arch', 'vendor_id_raw'])

In [6]:
# Make the target dataset
target_df = results_df[['total_time', 'brand_raw', 'count', 'l2_cache_size', 'l3_cache_size', 'l2_cache_line_size', 'l2_cache_associativity', 'ghz_advertised_friendly', 'benchmark']].copy()
# Rename columns to *_target
target_df = target_df.rename(columns={
    'total_time': 'total_time_target',
    'brand_raw': 'brand_raw_target',
    'count': 'count_target',
    'l2_cache_size': 'l2_cache_size_target',
    'l3_cache_size': 'l3_cache_size_target',
    'l2_cache_line_size': 'l2_cache_line_size_target',
    'l2_cache_associativity': 'l2_cache_associativity_target',
    'ghz_advertised_friendly': 'ghz_advertised_friendly_target',
})

dataset_df = pd.merge(results_df, target_df, how='inner', on='benchmark')
dataset_df = dataset_df[dataset_df['brand_raw'] != dataset_df['brand_raw_target']]
dataset_df.head(2)

Unnamed: 0,total_time,total_cpu_usage,max_ram_usage,brand_raw,count,l2_cache_size,l3_cache_size,l2_cache_line_size,l2_cache_associativity,benchmark,ghz_actual_friendly,ghz_advertised_friendly,total_time_target,brand_raw_target,count_target,l2_cache_size_target,l3_cache_size_target,l2_cache_line_size_target,l2_cache_associativity_target,ghz_advertised_friendly_target
5,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,45.91,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496
6,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,25.77,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496


In [7]:
# remove one computer for testing
g_train = dataset_df[(dataset_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (dataset_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
g_test = dataset_df[dataset_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [8]:
mm_df = dataset_df[dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
mm_train = mm_df[(mm_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (mm_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
mm_test = mm_df[mm_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [9]:
st_df = dataset_df[~dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
st_train = st_df[(st_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (st_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
st_test = st_df[st_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [10]:
# load test dataset
g_test = pd.read_csv('csv/g_test.csv')
st_test = pd.read_csv('csv/st_test.csv')
mm_test = pd.read_csv('csv/mm_test.csv')

In [11]:
target = 'total_time_target'
features = mm_test.columns.copy().drop(target).drop(['benchmark','brand_raw', 'brand_raw_target'])
features_st = features.copy().drop(['count', 'count_target'])

In [12]:
# general data
## split data
X_g_train = g_train[features]
y_g_train = g_train[target]

X_g_test = g_test[features]
y_g_test = g_test[target]

## normalize data
scaler_g = StandardScaler()
X_g_train = scaler_g.fit_transform(X_g_train)
X_g_test = scaler_g.transform(X_g_test)

## convert to tensor
X_g_train_t = torch.tensor(X_g_train, dtype=torch.float32).unsqueeze(1)
X_g_test_t = torch.tensor(X_g_test, dtype=torch.float32).unsqueeze(1)
y_g_train_t = torch.tensor(y_g_train.values, dtype=torch.float32).view(-1, 1)
y_g_test_t = torch.tensor(y_g_test.values, dtype=torch.float32).view(-1, 1)

In [13]:
# single thread data
## split data
X_st_train = st_train[features_st]
y_st_train = st_train[target]

X_st_test = st_test[features_st]
y_st_test = st_test[target]

## normalize data
scaler_st = StandardScaler()
X_st_train = scaler_st.fit_transform(X_st_train)
X_st_test = scaler_st.transform(X_st_test)

## convert to tensor
X_st_train_t = torch.tensor(X_st_train, dtype=torch.float32).unsqueeze(1)
X_st_test_t = torch.tensor(X_st_test, dtype=torch.float32).unsqueeze(1)
y_st_train_t = torch.tensor(y_st_train.values, dtype=torch.float32).view(-1, 1)
y_st_test_t = torch.tensor(y_st_test.values, dtype=torch.float32).view(-1, 1)

In [14]:
# multi thread data
## split data
X_mm_train = mm_train[features]
y_mm_train = mm_train[target]

X_mm_test = mm_test[features]
y_mm_test = mm_test[target]

## normalize data
scaler_mm = StandardScaler()
X_mm_train = scaler_mm.fit_transform(X_mm_train)
X_mm_test = scaler_mm.transform(X_mm_test)

## convert to tensor
X_mm_train_t = torch.tensor(X_mm_train, dtype=torch.float32).unsqueeze(1)
X_mm_test_t = torch.tensor(X_mm_test, dtype=torch.float32).unsqueeze(1)
y_mm_train_t = torch.tensor(y_mm_train.values, dtype=torch.float32).view(-1, 1)
y_mm_test_t = torch.tensor(y_mm_test.values, dtype=torch.float32).view(-1, 1)

In [15]:
if DEVICE.type == 'cuda':
	# move to DEVICE
	X_g_train_t = X_g_train_t.to(DEVICE)
	y_g_train_t = y_g_train_t.to(DEVICE)
	X_g_test_t = X_g_test_t.to(DEVICE)
	y_g_test_t = y_g_test_t.to(DEVICE)

	X_st_train_t = X_st_train_t.to(DEVICE)
	y_st_train_t = y_st_train_t.to(DEVICE)
	X_st_test_t = X_st_test_t.to(DEVICE)
	y_st_test_t = y_st_test_t.to(DEVICE)

	X_mm_train_t = X_mm_train_t.to(DEVICE)
	y_mm_train_t = y_mm_train_t.to(DEVICE)
	X_mm_test_t = X_mm_test_t.to(DEVICE)
	y_mm_test_t = y_mm_test_t.to(DEVICE)

# Model

In [20]:
class FeedforwardModel(nn.Module):
	def __init__(self, input_dim, output_dim, dropout=0.1):
		super(FeedforwardModel, self).__init__()
		# layers
		self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(32, output_dim)
        )
	
	def forward(self, x):
		return self.model(x).view(-1,1)

class TransformerModel(nn.Module):
	def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1):
		super(TransformerModel, self).__init__()
		# layers
		self.embedding = nn.Linear(input_dim, model_dim)
		encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, batch_first=True)
		self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
		self.fc = nn.Linear(model_dim, output_dim)
		self.dropout = nn.Dropout(dropout)
	
	def forward(self, x):
		x = self.embedding(x)
		x = self.dropout(x)
		x = self.transformer(x)
		x = self.fc(x.mean(dim=1))
		return x
	
class EnsembleModel(nn.Module):
	def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, type, dropout=0.1):
		super(EnsembleModel, self).__init__()
		self.model1 = FeedforwardModel(input_dim, output_dim, dropout)
		self.model2 = TransformerModel(input_dim, model_dim, num_heads, num_layers, output_dim, dropout)
		self.model3 = TabNetRegressor()
		self.model3.load_model(f'../models/tabnet/{type}.zip')
		self.model4 = xgb.XGBRegressor()
		self.model4.load_model(f'../models/xgboost/{type}.json')
		self.fc = nn.Linear(output_dim * 2 + 1 + 1, output_dim)
	
	def forward(self, x, x_t):
		out1 = self.model1(x_t)
		out2 = self.model2(x_t)
		out3 = self.model3.predict(x)
		out4 = self.model4.predict(x)

		out3 = torch.tensor(out3, dtype=torch.float32).view(-1, 1).to(x_t.device)
		out4 = torch.tensor(out4, dtype=torch.float32).view(-1, 1).to(x_t.device)

		out = torch.cat((out1, out2, out3, out4), dim=1)
		out = self.fc(out)
		return out

In [21]:
def objective(
		trial: optuna.Trial,
		X_train, X_test,
		X_train_t, y_train_t, X_test_t, y_test_t,
		input_dim, output_dim, type
	):
	# Definimos los hiperparámetros a buscar
	num_heads = trial.suggest_int('num_heads', 1, 8)
	model_dim = trial.suggest_int('model_dim', num_heads * 4, num_heads * 64, step=num_heads)
	num_layers = trial.suggest_int('num_layers', 1, 6)
	dropout = trial.suggest_float('dropout', 0.1, 0.5)
	learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
	weight_decay = trial.suggest_float('weight_decay', 1e-5, 1e-2, log=True)
	num_epochs = trial.suggest_int('num_epochs', 10, 100)

	# model initialization 
	model = EnsembleModel(input_dim, model_dim, num_heads, num_layers, output_dim, type, dropout)
	if DEVICE.type == 'cuda':
		model = model.to(DEVICE)
	criterion = nn.MSELoss()
	optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
	# training
	model.train()
	for epoch in range(num_epochs):
		optimizer.zero_grad()
		output = model(X_train, X_train_t)
		loss = criterion(output, y_train_t)
		loss.backward()
		optimizer.step()
	# evaluation
	model.eval()
	with torch.no_grad():
		predictions = model(X_test, X_test_t)
		val_loss = criterion(predictions, y_test_t)

		# trial.report(val_loss.item(), epoch+1)
		# if trial.should_prune():
		# 	raise optuna.TrialPruned()
	print(f"Trial: {trial.number} - Loss: {loss.item()} - Val Loss: {val_loss.item()}")
	return val_loss.item()

# Hyperparameters Optimization

In [22]:
n_trials = 25
study_g = None
study_st = None
study_mm = None

## General

In [23]:
# configuration optuna
study_g = optuna.create_study(direction='minimize', sampler=sampler)
study_g.optimize(lambda trial: objective(trial,
                                        X_g_train, X_g_test, 
                                        X_g_train_t, y_g_train_t, X_g_test_t, y_g_test_t,
                                        len(features), 1, 'general'), n_trials=n_trials)

[I 2024-07-03 10:03:45,302] A new study created in memory with name: no-name-fc23a59b-9e75-40e4-b0f1-25a40a19c2be
[I 2024-07-03 10:04:20,941] Trial 0 finished with value: 109.85990142822266 and parameters: {'num_heads': 7, 'model_dim': 280, 'num_layers': 5, 'dropout': 0.10823379771832098, 'learning_rate': 0.008123245085588688, 'weight_decay': 0.00314288089084011, 'num_epochs': 29}. Best is trial 0 with value: 109.85990142822266.


Trial: 0 - Loss: 120.29765319824219 - Val Loss: 109.85990142822266


[I 2024-07-03 10:04:46,208] Trial 1 finished with value: 459.22149658203125 and parameters: {'num_heads': 2, 'model_dim': 30, 'num_layers': 2, 'dropout': 0.3099025726528951, 'learning_rate': 0.00019762189340280086, 'weight_decay': 7.476312062252303e-05, 'num_epochs': 65}. Best is trial 0 with value: 109.85990142822266.


Trial: 1 - Loss: 444.8358154296875 - Val Loss: 459.22149658203125


[I 2024-07-03 10:05:10,622] Trial 2 finished with value: 115.1418685913086 and parameters: {'num_heads': 2, 'model_dim': 42, 'num_layers': 3, 'dropout': 0.28242799368681437, 'learning_rate': 0.0022673986523780395, 'weight_decay': 3.972110727381908e-05, 'num_epochs': 56}. Best is trial 0 with value: 109.85990142822266.


Trial: 2 - Loss: 158.81756591796875 - Val Loss: 115.1418685913086


[I 2024-07-03 10:06:17,991] Trial 3 finished with value: 499.9704895019531 and parameters: {'num_heads': 5, 'model_dim': 30, 'num_layers': 4, 'dropout': 0.16820964947491662, 'learning_rate': 1.5673095467235405e-05, 'weight_decay': 0.007025166339242158, 'num_epochs': 97}. Best is trial 0 with value: 109.85990142822266.


Trial: 3 - Loss: 478.4620666503906 - Val Loss: 499.9704895019531


[I 2024-07-03 10:06:41,584] Trial 4 finished with value: 373.4997253417969 and parameters: {'num_heads': 7, 'model_dim': 154, 'num_layers': 1, 'dropout': 0.3736932106048628, 'learning_rate': 0.00020914981329035596, 'weight_decay': 2.32335035153901e-05, 'num_epochs': 55}. Best is trial 0 with value: 109.85990142822266.


Trial: 4 - Loss: 387.8373107910156 - Val Loss: 373.4997253417969


[I 2024-07-03 10:07:01,964] Trial 5 finished with value: 456.6725769042969 and parameters: {'num_heads': 1, 'model_dim': 59, 'num_layers': 2, 'dropout': 0.36500891374159283, 'learning_rate': 8.612579192594876e-05, 'weight_decay': 0.00036324869566766035, 'num_epochs': 59}. Best is trial 0 with value: 109.85990142822266.


Trial: 5 - Loss: 446.53472900390625 - Val Loss: 456.6725769042969


[I 2024-07-03 10:07:58,894] Trial 6 finished with value: 225.4366912841797 and parameters: {'num_heads': 2, 'model_dim': 126, 'num_layers': 5, 'dropout': 0.4757995766256756, 'learning_rate': 0.004835952776465951, 'weight_decay': 0.0006218704727769079, 'num_epochs': 93}. Best is trial 0 with value: 109.85990142822266.


Trial: 6 - Loss: 38.048770904541016 - Val Loss: 225.4366912841797


[I 2024-07-03 10:08:24,711] Trial 7 finished with value: 483.0039367675781 and parameters: {'num_heads': 1, 'model_dim': 15, 'num_layers': 1, 'dropout': 0.23013213230530574, 'learning_rate': 0.00014656553886225324, 'weight_decay': 6.516990611177177e-05, 'num_epochs': 85}. Best is trial 0 with value: 109.85990142822266.


Trial: 7 - Loss: 462.137451171875 - Val Loss: 483.0039367675781


[I 2024-07-03 10:09:21,827] Trial 8 finished with value: 241.3771514892578 and parameters: {'num_heads': 3, 'model_dim': 63, 'num_layers': 4, 'dropout': 0.15636968998990508, 'learning_rate': 0.002550298070162891, 'weight_decay': 1.6736010167825783e-05, 'num_epochs': 99}. Best is trial 0 with value: 109.85990142822266.


Trial: 8 - Loss: 49.231842041015625 - Val Loss: 241.3771514892578


[I 2024-07-03 10:09:55,693] Trial 9 finished with value: 225.70355224609375 and parameters: {'num_heads': 7, 'model_dim': 112, 'num_layers': 1, 'dropout': 0.42618457138193366, 'learning_rate': 0.001319994226153501, 'weight_decay': 0.0015382308040279, 'num_epochs': 80}. Best is trial 0 with value: 109.85990142822266.


Trial: 9 - Loss: 159.583984375 - Val Loss: 225.70355224609375


[W 2024-07-03 10:09:56,620] Trial 10 failed with parameters: {'num_heads': 8, 'model_dim': 424, 'num_layers': 6, 'dropout': 0.10718475024592788, 'learning_rate': 0.008947143993486005, 'weight_decay': 0.005284384205738849, 'num_epochs': 19} because of the following error: OutOfMemoryError('CUDA out of memory. Tried to allocate 2.12 GiB. GPU 0 has a total capacity of 7.91 GiB of which 188.38 MiB is free. Process 102737 has 126.00 MiB memory in use. Process 72572 has 170.00 MiB memory in use. Process 74823 has 180.00 MiB memory in use. Including non-PyTorch memory, this process has 6.70 GiB memory in use. Of the allocated memory 5.21 GiB is allocated by PyTorch, and 1.36 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)').
Traceback (most recent call last)

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.12 GiB. GPU 0 has a total capacity of 7.91 GiB of which 188.38 MiB is free. Process 102737 has 126.00 MiB memory in use. Process 72572 has 170.00 MiB memory in use. Process 74823 has 180.00 MiB memory in use. Including non-PyTorch memory, this process has 6.70 GiB memory in use. Of the allocated memory 5.21 GiB is allocated by PyTorch, and 1.36 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Training

In [None]:
models_folder = '../models/ensemble/'
output_dim = 1

In [None]:
#dump(scaler_g, f'{models_folder}/scaler_g.joblib')
#dump(scaler_st, f'{models_folder}/scaler_st.joblib')
#dump(scaler_mm, f'{models_folder}/scaler_mm.joblib')

## General

In [None]:
# model initialization 
model_g = EnsembleModel(input_dim, model_dim, num_heads, num_layers, output_dim, type, dropout)
if DEVICE.type == 'cuda':
	model_g = model_g.to(DEVICE)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model_g.parameters(), lr=learning_rate, weight_decay=weight_decay)
# training
model_g.train()
for epoch in range(num_epochs):
	optimizer.zero_grad()
	output = model_g(X_train, X_train_t)
	loss = criterion(output, y_train_t)
	loss.backward()
	optimizer.step()
	# validation
	if (epoch+1) % 10 == 0 or epoch == num_epochs-1:
		model_g.eval()
		with torch.no_grad():
			val_predictions = model_g(X_g_test, X_g_test_t)
			val_loss = criterion(val_predictions, y_g_test)
		print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')
		model_g.train()

print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_g_test, preds)}")

Linear(in_features=16, out_features=64, bias=True)
torch.Size([900, 1, 16])
MSE: 97.95959151197678 - RMSE: 9.897453789332728 - MAE: 6.771173392910427


In [None]:
model_g.eval()
with torch.no_grad():
	preds = model_g(X_g_test_t).cpu().numpy().flatten()
mse = mean_squared_error(y_g_test.cpu().numpy().flatten(), preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_g_test.cpu().numpy().flatten(), preds)}")

In [None]:
# save model
torch.save(model_g, f'{models_folder}/general.pt')

## Single Thread

In [None]:
# general models
## TabNet
st_tabnet = TabNetRegressor()
st_tabnet.load_model(models_path['single_thread']['tabnet'])
## XGBoost
st_xgboost = xgb.XGBRegressor()
st_xgboost.load_model(models_path['single_thread']['xgboost'])



In [None]:
models_st = [
    torch.load(models_path["single_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["single_thread"]["transformer"]).to(DEVICE),
    st_tabnet,
    st_xgboost
]

In [None]:
preds = ensemble_predict(models_st, X_st_test, X_st_test_t)
mse = mean_squared_error(y_st_test, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_st_test, preds)}")

Linear(in_features=14, out_features=64, bias=True)
torch.Size([675, 1, 14])
MSE: 56.74999736758766 - RMSE: 7.533259411940336 - MAE: 4.2718147920961735


## Multi Thread

In [None]:
# general models
## TabNet
mm_tabnet = TabNetRegressor()
mm_tabnet.load_model(models_path['multi_thread']['tabnet'])
## XGBoost
mm_xgboost = xgb.XGBRegressor()
mm_xgboost.load_model(models_path['multi_thread']['xgboost'])



In [None]:
models_mm = [
    torch.load(models_path["multi_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["multi_thread"]["transformer"]).to(DEVICE),
    mm_tabnet,
    mm_xgboost
]

In [None]:
preds = ensemble_predict(models_mm, X_mm_test, X_mm_test_t)
mse = mean_squared_error(y_mm_test, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_mm_test, preds)}")

Linear(in_features=16, out_features=64, bias=True)
torch.Size([225, 1, 16])
MSE: 492.0993801494934 - RMSE: 22.183313101281634 - MAE: 22.010240815056694


# Load models

In [None]:
def describe_val(model, X, X_t, y):
	min_instance = {"prediction": float('inf'), "actual": 0, "index": 0}
	max_instance = {"prediction": 0, "actual": 0, "index": 0}
	
	predictions = ensemble_predict(model, X, X_t)
	index_min = np.argmin(np.abs(predictions - y))
	min_instance["prediction"] = predictions[index_min]
	min_instance["actual"] = y[index_min]
	min_instance["index"] = index_min
	index_max = np.argmax(np.abs(predictions - y))
	max_instance["prediction"] = predictions[index_max]
	max_instance["actual"] = y[index_max]
	max_instance["index"] = index_max

	return min_instance, max_instance, predictions

In [None]:
# general model
print("Validation set general model")
min_instance, max_instance, predictions = describe_val(models_g, X_g_test, X_g_test_t, y_g_test)
errors = np.abs(predictions - y_g_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_g_test)} | Std actual: {np.std(y_g_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set general model
Linear(in_features=16, out_features=64, bias=True)
torch.Size([900, 1, 16])
Mean prediction: 23.126462936401367 | Std actual: 3.2905521392822266
Mean actual: 27.556500000000003 | Std actual: 7.4113266524961645
Mean Error: 6.771173392910427 | Std Error: 7.218781226434187
---
Min instance
total_time                                                            24.22
total_cpu_usage                                                        0.99
max_ram_usage                                                     25.316406
brand_raw                         Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz
count                                                                    24
l2_cache_size                                                           6.0
l3_cache_size                                                          30.0
l2_cache_line_size                                                      256
l2_cache_associativity                                                    6

In [None]:
# single thread model
print("Validation set single thread model")
min_instance, max_instance, predictions = describe_val(models_st, X_st_test, X_st_test_t, y_st_test)
errors = np.abs(predictions - y_st_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_st_test)} | Std actual: {np.std(y_st_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(st_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(st_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set single thread model
Linear(in_features=14, out_features=64, bias=True)
torch.Size([675, 1, 14])
Mean prediction: 23.79733657836914 | Std actual: 3.99114727973938
Mean actual: 24.564000000000007 | Std actual: 6.096883138128859
Mean Error: 4.2718147920961735 | Std Error: 6.204965410831554
Min instance
total_time                                                           24.81
total_cpu_usage                                                        1.0
max_ram_usage                                                    31.253906
brand_raw                         Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz
count                                                                    8
l2_cache_size                                                          1.0
l3_cache_size                                                          8.0
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark

In [None]:
# multi thread model
print("Validation set multi thread model")
min_instance, max_instance, predictions = describe_val(models_mm, X_mm_test, X_mm_test_t, y_mm_test)
errors = np.abs(predictions - y_mm_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_mm_test)} | Std actual: {np.std(y_mm_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(mm_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(mm_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set multi thread model
Linear(in_features=16, out_features=64, bias=True)
torch.Size([225, 1, 16])
Mean prediction: 14.523759841918945 | Std actual: 2.6294972896575928
Mean actual: 36.534 | Std actual: 0.8569854141115829
Mean Error: 22.010240815056694 | Std Error: 2.7656245972123217
Min instance
total_time                                                            26.65
total_cpu_usage                                                        1.92
max_ram_usage                                                   2333.460938
brand_raw                         Intel(R) Xeon(R) CPU E5-2623 v3 @ 3.00GHz
count                                                                     8
l2_cache_size                                                           2.0
l3_cache_size                                                          10.0
l2_cache_line_size                                                      256
l2_cache_associativity                                                    2
benchmar