In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from pytorch_tabnet.tab_model import TabNetRegressor
import xgboost as xgb
import random
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from joblib import dump

In [2]:
# CUDA
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE.type

'cuda'

In [3]:
# Fix random seed
seed = 42
torch.manual_seed(seed)
if DEVICE.type == 'cuda':
	torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Pre-processing input data

In [4]:
def bits_to_MiB(row):
	# verify if has string ' MiB'
	if 'MiB' in str(row):
		row = row.replace(' MiB', '')
		row = float(row)
	else:
		row = float(row) / np.power(2, 20)
	return row


def MHz_to_GHz(row):
	# verify if has string ' GHz'
	if 'GHz' in str(row):
		row = row.replace(' GHz', '')
		# convert to float
		row = float(row)
	else:
		row = row.replace(' MHz', '')
		row = float(row) / 1000
	return row

In [5]:
results_df = pd.read_csv('../results_new/execution_time.csv')
results_savio_df = pd.read_csv('../results_savio_new/execution_time.csv')
results_df = pd.concat([results_df, results_savio_df], ignore_index=True)
# preprocessing
results_df['total_cpu_usage'] = results_df['total_cpu_usage'].str.replace('%', '').astype(float) / 100
results_df['max_ram_usage'] = results_df['max_ram_usage'] / 1024
results_df['l2_cache_size'] = results_df['l2_cache_size'].apply(bits_to_MiB)
results_df['l3_cache_size'] = results_df['l3_cache_size'].apply(bits_to_MiB)
results_df['ghz_actual_friendly'] = results_df['hz_actual_friendly'].apply(MHz_to_GHz)
results_df['ghz_advertised_friendly'] = results_df['hz_advertised_friendly'].str.replace('GHz', '').astype(float)
results_df = results_df.drop(columns=['hz_actual_friendly', 'hz_advertised_friendly', 'arch', 'vendor_id_raw'])

In [6]:
# Make the target dataset
target_df = results_df[['total_time', 'brand_raw', 'count', 'l2_cache_size', 'l3_cache_size', 'l2_cache_line_size', 'l2_cache_associativity', 'ghz_advertised_friendly', 'benchmark']].copy()
# Rename columns to *_target
target_df = target_df.rename(columns={
    'total_time': 'total_time_target',
    'brand_raw': 'brand_raw_target',
    'count': 'count_target',
    'l2_cache_size': 'l2_cache_size_target',
    'l3_cache_size': 'l3_cache_size_target',
    'l2_cache_line_size': 'l2_cache_line_size_target',
    'l2_cache_associativity': 'l2_cache_associativity_target',
    'ghz_advertised_friendly': 'ghz_advertised_friendly_target',
})

dataset_df = pd.merge(results_df, target_df, how='inner', on='benchmark')
dataset_df = dataset_df[dataset_df['brand_raw'] != dataset_df['brand_raw_target']]
dataset_df.head(2)

Unnamed: 0,total_time,total_cpu_usage,max_ram_usage,brand_raw,count,l2_cache_size,l3_cache_size,l2_cache_line_size,l2_cache_associativity,benchmark,ghz_actual_friendly,ghz_advertised_friendly,total_time_target,brand_raw_target,count_target,l2_cache_size_target,l3_cache_size_target,l2_cache_line_size_target,l2_cache_associativity_target,ghz_advertised_friendly_target
5,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,45.91,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496
6,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,25.77,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496


In [7]:
# remove one computer for testing
g_train = dataset_df[(dataset_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (dataset_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
g_test = dataset_df[dataset_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [8]:
mm_df = dataset_df[dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
mm_train = mm_df[(mm_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (mm_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
mm_test = mm_df[mm_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [9]:
st_df = dataset_df[~dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
st_train = st_df[(st_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (st_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
st_test = st_df[st_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [10]:
# load test dataset
# g_test = pd.read_csv('csv/g_test.csv')
# st_test = pd.read_csv('csv/st_test.csv')
# mm_test = pd.read_csv('csv/mm_test.csv')

In [11]:
target = 'total_time_target'
features = mm_test.columns.copy().drop(target).drop(['benchmark','brand_raw', 'brand_raw_target'])
features_st = features.copy().drop(['count', 'count_target'])

In [12]:
# general data
## split data
X_g_train = g_train[features]
y_g_train = g_train[target]

X_g_test = g_test[features]
y_g_test = g_test[target]

## normalize data
x_g_scaler = MinMaxScaler(feature_range=(0, 1))
X_g_train = x_g_scaler.fit_transform(X_g_train)
X_g_test = x_g_scaler.transform(X_g_test)
y_g_scaler = MinMaxScaler(feature_range=(0, 1))
y_g_train = y_g_scaler.fit_transform(y_g_train.values.reshape(-1, 1))
y_g_test = y_g_scaler.transform(y_g_test.values.reshape(-1, 1))

## convert to tensor
X_g_train_t = torch.tensor(X_g_train, dtype=torch.float32)
X_g_test_t = torch.tensor(X_g_test, dtype=torch.float32)
y_g_train_t = torch.tensor(y_g_train, dtype=torch.float32)
y_g_test_t = torch.tensor(y_g_test, dtype=torch.float32)

In [13]:
# single thread data
## split data
X_st_train = st_train[features_st]
y_st_train = st_train[target]

X_st_test = st_test[features_st]
y_st_test = st_test[target]

## normalize data
x_st_scaler = MinMaxScaler(feature_range=(0, 1))
X_st_train = x_st_scaler.fit_transform(X_st_train)
X_st_test = x_st_scaler.transform(X_st_test)
y_st_scaler = MinMaxScaler(feature_range=(0, 1))
y_st_train = y_st_scaler.fit_transform(y_st_train.values.reshape(-1, 1))
y_st_test = y_st_scaler.transform(y_st_test.values.reshape(-1, 1))

## convert to tensor
X_st_train_t = torch.tensor(X_st_train, dtype=torch.float32)
X_st_test_t = torch.tensor(X_st_test, dtype=torch.float32)
y_st_train_t = torch.tensor(y_st_train, dtype=torch.float32)
y_st_test_t = torch.tensor(y_st_test, dtype=torch.float32)

In [14]:
# multi thread data
## split data
X_mm_train = mm_train[features]
y_mm_train = mm_train[target]

X_mm_test = mm_test[features]
y_mm_test = mm_test[target]

## normalize data
x_mm_scaler = MinMaxScaler(feature_range=(0, 1))
X_mm_train = x_mm_scaler.fit_transform(X_mm_train)
X_mm_test = x_mm_scaler.transform(X_mm_test)
y_mm_scaler = MinMaxScaler(feature_range=(0, 1))
y_mm_train = y_mm_scaler.fit_transform(y_mm_train.values.reshape(-1, 1))
y_mm_test = y_mm_scaler.transform(y_mm_test.values.reshape(-1, 1))

## convert to tensor
X_mm_train_t = torch.tensor(X_mm_train, dtype=torch.float32)
X_mm_test_t = torch.tensor(X_mm_test, dtype=torch.float32)
y_mm_train_t = torch.tensor(y_mm_train, dtype=torch.float32)
y_mm_test_t = torch.tensor(y_mm_test, dtype=torch.float32)

In [15]:
if DEVICE.type == 'cuda':
	# move to DEVICE
	X_g_train_t = X_g_train_t.to(DEVICE)
	y_g_train_t = y_g_train_t.to(DEVICE)
	X_g_test_t = X_g_test_t.to(DEVICE)
	y_g_test_t = y_g_test_t.to(DEVICE)

	X_st_train_t = X_st_train_t.to(DEVICE)
	y_st_train_t = y_st_train_t.to(DEVICE)
	X_st_test_t = X_st_test_t.to(DEVICE)
	y_st_test_t = y_st_test_t.to(DEVICE)

	X_mm_train_t = X_mm_train_t.to(DEVICE)
	y_mm_train_t = y_mm_train_t.to(DEVICE)
	X_mm_test_t = X_mm_test_t.to(DEVICE)
	y_mm_test_t = y_mm_test_t.to(DEVICE)

# Model

In [16]:
class FeedforwardModel(nn.Module):
	def __init__(self, input_dim, output_dim, dropout=0.1):
		super(FeedforwardModel, self).__init__()
		# layers
		self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(32, output_dim),
			nn.ReLU()
        )
	
	def forward(self, x):
		return self.model(x)

class TransformerModel(nn.Module):
	def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1):
		super(TransformerModel, self).__init__()
		# layers
		self.embedding = nn.Linear(input_dim, model_dim)
		encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, batch_first=True)
		self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
		self.fc = nn.Linear(model_dim, output_dim)
		self.dropout = nn.Dropout(dropout)
	
	def forward(self, x):
		x = self.embedding(x)
		x = self.dropout(x)
		x = self.transformer(x)
		x = self.fc(x.mean(dim=1))
		return x

In [17]:
def inv_scaling_pyt(y, y_scaler, d=False):
    if d:
        return y_scaler.inverse_transform(y.detach().cpu().numpy().reshape(-1, 1))
    return y_scaler.inverse_transform(y.cpu().numpy().reshape(-1, 1))

def inv_scaling(y, y_scaler):
    return y_scaler.inverse_transform(y.reshape(-1, 1))

In [18]:
models_folder = '../models/'
models_path = {
	'general': {
		'tabnet': models_folder + 'tabnet/' + 'general.zip',
		'transformer': models_folder + 'transformer/' + 'general.pt',
		'feedforward': models_folder + 'feedforward/' + 'general.pt',
		'xgboost': models_folder + 'xgboost/' + 'general.json'
	},
	'single_thread': {
		'tabnet': models_folder + 'tabnet/' + 'single_thread.zip',
		'transformer': models_folder + 'transformer/' + 'single_thread.pt',
		'feedforward': models_folder + 'feedforward/' + 'single_thread.pt',
		'xgboost': models_folder + 'xgboost/' + 'single_thread.json'
	},
	'multi_thread': {
		'tabnet': models_folder + 'tabnet/' + 'multi_thread.zip',
		'transformer': models_folder + 'transformer/' + 'multi_thread.pt',
		'feedforward': models_folder + 'feedforward/' + 'multi_thread.pt',
		'xgboost': models_folder + 'xgboost/' + 'multi_thread.json'
	}
}

In [19]:
def ensemble_predict(models, X, X_t, y_scaler):
	predictions = []
	with torch.no_grad():
		prediction = models[0](X_t)
		predictions.append(inv_scaling_pyt(prediction, y_scaler))
	prediction = models[2].predict(X)
	predictions.append(inv_scaling(prediction, y_scaler))
	prediction = models[3].predict(X)
	predictions.append(inv_scaling(prediction, y_scaler))
	avg_predictions = np.mean(predictions, axis=0)
	return avg_predictions

# Training

In [20]:
output_dim = 1

## General

In [21]:
# general models
## TabNet
g_tabnet = TabNetRegressor()
g_tabnet.load_model(models_path['general']['tabnet'])
## XGBoost
g_xgboost = xgb.XGBRegressor()
g_xgboost.load_model(models_path['general']['xgboost'])



In [22]:
models_g = [
    torch.load(models_path["general"]["feedforward"]).to(DEVICE),
    torch.load(models_path["general"]["transformer"]).to(DEVICE),
    g_tabnet,
    g_xgboost
]

In [23]:
preds = ensemble_predict(models_g, X_g_test, X_g_test_t, y_g_scaler)
y_scaled = inv_scaling(y_g_test, y_g_scaler)
mse = mean_squared_error(y_scaled, preds)
mae = mean_absolute_error(y_scaled, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mae}")

MSE: 318.57197552742275 - RMSE: 17.848584692558195 - MAE: 13.308869249539763


## Single Thread

In [24]:
# general models
## TabNet
st_tabnet = TabNetRegressor()
st_tabnet.load_model(models_path['single_thread']['tabnet'])
## XGBoost
st_xgboost = xgb.XGBRegressor()
st_xgboost.load_model(models_path['single_thread']['xgboost'])



In [25]:
models_st = [
    torch.load(models_path["single_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["single_thread"]["transformer"]).to(DEVICE),
    st_tabnet,
    st_xgboost
]

In [27]:
preds = ensemble_predict(models_st, X_st_test, X_st_test_t, y_st_scaler)
y_scaled = inv_scaling(y_st_test, y_st_scaler)
mse = mean_squared_error(y_scaled, preds)
mae = mean_absolute_error(y_scaled, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mae}")

MSE: 250.89223979634184 - RMSE: 15.839578270785553 - MAE: 10.705123859011742


## Multi Thread

In [29]:
# general models
## TabNet
mm_tabnet = TabNetRegressor()
mm_tabnet.load_model(models_path['multi_thread']['tabnet'])
## XGBoost
mm_xgboost = xgb.XGBRegressor()
mm_xgboost.load_model(models_path['multi_thread']['xgboost'])



In [30]:
models_mm = [
    torch.load(models_path["multi_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["multi_thread"]["transformer"]).to(DEVICE),
    mm_tabnet,
    mm_xgboost
]

In [31]:
preds = ensemble_predict(models_mm, X_mm_test, X_mm_test_t, y_mm_scaler)
y_scaled = inv_scaling(y_mm_test, y_mm_scaler)
mse = mean_squared_error(y_scaled, preds)
mae = mean_absolute_error(y_scaled, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mae}")

MSE: 503.05628317175626 - RMSE: 22.428916228203185 - MAE: 22.395675151189163


# Load models

In [41]:
def describe_val(model, X, X_t, y, y_scaler):
	min_instance = {"prediction": float('inf'), "actual": 0, "index": 0}
	max_instance = {"prediction": 0, "actual": 0, "index": 0}
	
	predictions = ensemble_predict(model, X, X_t, y_scaler)
	y_scaled = inv_scaling(y, y_scaler)
	index_min = np.argmin(np.abs(predictions - y_scaled))
	min_instance["prediction"] = predictions[index_min].item()
	min_instance["actual"] = y_scaled[index_min].item()
	min_instance["index"] = index_min
	index_max = np.argmax(np.abs(predictions - y_scaled))
	max_instance["prediction"] = predictions[index_max].item()
	max_instance["actual"] = y_scaled[index_max].item()
	max_instance["index"] = index_max

	return min_instance, max_instance, predictions

In [42]:
# general model
print("Validation set general model")
min_instance, max_instance, predictions = describe_val(models_g, X_g_test, X_g_test_t, y_g_test, y_g_scaler)
y_scaled = inv_scaling(y_g_test, y_g_scaler)
errors = np.abs(predictions - y_scaled)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_scaled)} | Std actual: {np.std(y_scaled)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set general model
Mean prediction: 22.28175163269043 | Std actual: 18.877511978149414
Mean actual: 35.041486486486484 | Std actual: 28.83706408578472
Mean Error: 13.308869249539763 | Std Error: 11.893106189136534
---
Min instance
total_time                                                       12.97
total_cpu_usage                                                   0.99
max_ram_usage                                              1436.332031
brand_raw                         12th Gen Intel(R) Core(TM) i5-12400F
count                                                               12
l2_cache_size                                                      7.5
l3_cache_size                                                     18.0
l2_cache_line_size                                                1280
l2_cache_associativity                                               7
benchmark                                                          KNP
ghz_actual_friendly                              

In [43]:
# single thread model
print("Validation set single thread model")
min_instance, max_instance, predictions = describe_val(models_st, X_st_test, X_st_test_t, y_st_test, y_st_scaler)
y_scaled = inv_scaling(y_st_test, y_st_scaler)
errors = np.abs(predictions - y_scaled)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_scaled)} | Std actual: {np.std(y_scaled)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set single thread model
Mean prediction: 24.895668029785156 | Std actual: 20.056520462036133
Mean actual: 34.75261290322581 | Std actual: 31.49396398818138
Mean Error: 10.705123859011742 | Std Error: 11.674440584437413
---
Min instance
total_time                                                          107.06
total_cpu_usage                                                       0.99
max_ram_usage                                                       15.125
brand_raw                         Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz
count                                                                   12
l2_cache_size                                                          1.5
l3_cache_size                                                         12.0
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark                                                        N_Queens2
ghz

In [44]:
# multi thread model
print("Validation set multi thread model")
min_instance, max_instance, predictions = describe_val(models_mm, X_mm_test, X_mm_test_t, y_mm_test, y_mm_scaler)
y_scaled = inv_scaling(y_mm_test, y_mm_scaler)
errors = np.abs(predictions - y_scaled)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_scaled)} | Std actual: {np.std(y_scaled)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set multi thread model
Mean prediction: 14.138323783874512 | Std actual: 0.8692489266395569
Mean actual: 36.534 | Std actual: 0.8569854141115829
Mean Error: 22.395675151189163 | Std Error: 1.2206628093639538
---
Min instance
total_time                                                           17.33
total_cpu_usage                                                       0.99
max_ram_usage                                                       15.625
brand_raw                         Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz
count                                                                   12
l2_cache_size                                                          1.5
l3_cache_size                                                         12.0
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark                                                         N_Queens
ghz_actual_fri