In [27]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from pytorch_tabnet.tab_model import TabNetRegressor
import xgboost as xgb
import random
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from joblib import dump

In [28]:
# CUDA
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE.type

'cuda'

In [29]:
# Fix random seed
seed = 42
torch.manual_seed(seed)
if DEVICE.type == 'cuda':
	torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Pre-processing input data

In [30]:
def bits_to_MiB(row):
	# verify if has string ' MiB'
	if 'MiB' in str(row):
		row = row.replace(' MiB', '')
		row = float(row)
	else:
		row = float(row) / np.power(2, 20)
	return row


def MHz_to_GHz(row):
	# verify if has string ' GHz'
	if 'GHz' in str(row):
		row = row.replace(' GHz', '')
		# convert to float
		row = float(row)
	else:
		row = row.replace(' MHz', '')
		row = float(row) / 1000
	return row

In [31]:
results_df = pd.read_csv('../results_new/execution_time.csv')
results_savio_df = pd.read_csv('../results_savio_new/execution_time.csv')
results_df = pd.concat([results_df, results_savio_df], ignore_index=True)
# preprocessing
results_df['total_cpu_usage'] = results_df['total_cpu_usage'].str.replace('%', '').astype(float) / 100
results_df['max_ram_usage'] = results_df['max_ram_usage'] / 1024
results_df['l2_cache_size'] = results_df['l2_cache_size'].apply(bits_to_MiB)
results_df['l3_cache_size'] = results_df['l3_cache_size'].apply(bits_to_MiB)
results_df['ghz_actual_friendly'] = results_df['hz_actual_friendly'].apply(MHz_to_GHz)
results_df['ghz_advertised_friendly'] = results_df['hz_advertised_friendly'].str.replace('GHz', '').astype(float)
results_df = results_df.drop(columns=['hz_actual_friendly', 'hz_advertised_friendly', 'arch', 'vendor_id_raw'])

In [32]:
# Make the target dataset
target_df = results_df[['total_time', 'brand_raw', 'count', 'l2_cache_size', 'l3_cache_size', 'l2_cache_line_size', 'l2_cache_associativity', 'ghz_advertised_friendly', 'benchmark']].copy()
# Rename columns to *_target
target_df = target_df.rename(columns={
    'total_time': 'total_time_target',
    'brand_raw': 'brand_raw_target',
    'count': 'count_target',
    'l2_cache_size': 'l2_cache_size_target',
    'l3_cache_size': 'l3_cache_size_target',
    'l2_cache_line_size': 'l2_cache_line_size_target',
    'l2_cache_associativity': 'l2_cache_associativity_target',
    'ghz_advertised_friendly': 'ghz_advertised_friendly_target',
})

dataset_df = pd.merge(results_df, target_df, how='inner', on='benchmark')
dataset_df = dataset_df[dataset_df['brand_raw'] != dataset_df['brand_raw_target']]
dataset_df.head(2)

Unnamed: 0,total_time,total_cpu_usage,max_ram_usage,brand_raw,count,l2_cache_size,l3_cache_size,l2_cache_line_size,l2_cache_associativity,benchmark,ghz_actual_friendly,ghz_advertised_friendly,total_time_target,brand_raw_target,count_target,l2_cache_size_target,l3_cache_size_target,l2_cache_line_size_target,l2_cache_associativity_target,ghz_advertised_friendly_target
5,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,45.91,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496
6,13.47,0.99,1436.714844,Intel(R) Core(TM) i5-10400 CPU @ 2.90GHz,12,1.5,12.0,256,6,KNP,4.1729,2.9,25.77,13th Gen Intel(R) Core(TM) i5-1335U,12,7.5,12.0,1280,7,2.496


In [33]:
# remove one computer for testing
g_train = dataset_df[(dataset_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (dataset_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
g_test = dataset_df[dataset_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [34]:
mm_df = dataset_df[dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
mm_train = mm_df[(mm_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (mm_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
mm_test = mm_df[mm_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [35]:
st_df = dataset_df[~dataset_df['benchmark'].isin(['MATRIX_MULT', 'MATRIX_MULT2', 'MATRIX_MULT3'])]
# remove one computer for testing
st_train = st_df[(st_df['brand_raw'] != '13th Gen Intel(R) Core(TM) i5-1335U') & (st_df['brand_raw_target'] != '13th Gen Intel(R) Core(TM) i5-1335U')]
st_test = st_df[st_df['brand_raw_target'] == '13th Gen Intel(R) Core(TM) i5-1335U']

In [36]:
# load test dataset
g_test = pd.read_csv('csv/g_test.csv')
st_test = pd.read_csv('csv/st_test.csv')
mm_test = pd.read_csv('csv/mm_test.csv')

In [37]:
target = 'total_time_target'
features = mm_test.columns.copy().drop(target).drop(['benchmark','brand_raw', 'brand_raw_target'])
features_st = features.copy().drop(['count', 'count_target'])

In [38]:
# general data
## split data
X_g_train = g_train[features]
y_g_train = g_train[target]

X_g_test = g_test[features]
y_g_test = g_test[target]

## normalize data
scaler_g = StandardScaler()
X_g_train = scaler_g.fit_transform(X_g_train)
X_g_test = scaler_g.transform(X_g_test)

## convert to tensor
X_g_train_t = torch.tensor(X_g_train, dtype=torch.float32).unsqueeze(1)
X_g_test_t = torch.tensor(X_g_test, dtype=torch.float32).unsqueeze(1)
y_g_train_t = torch.tensor(y_g_train.values, dtype=torch.float32).view(-1, 1)
y_g_test_t = torch.tensor(y_g_test.values, dtype=torch.float32).view(-1, 1)

In [39]:
# single thread data
## split data
X_st_train = st_train[features_st]
y_st_train = st_train[target]

X_st_test = st_test[features_st]
y_st_test = st_test[target]

## normalize data
scaler_st = StandardScaler()
X_st_train = scaler_st.fit_transform(X_st_train)
X_st_test = scaler_st.transform(X_st_test)

## convert to tensor
X_st_train_t = torch.tensor(X_st_train, dtype=torch.float32).unsqueeze(1)
X_st_test_t = torch.tensor(X_st_test, dtype=torch.float32).unsqueeze(1)
y_st_train_t = torch.tensor(y_st_train.values, dtype=torch.float32).view(-1, 1)
y_st_test_t = torch.tensor(y_st_test.values, dtype=torch.float32).view(-1, 1)

In [40]:
# multi thread data
## split data
X_mm_train = mm_train[features]
y_mm_train = mm_train[target]

X_mm_test = mm_test[features]
y_mm_test = mm_test[target]

## normalize data
scaler_mm = StandardScaler()
X_mm_train = scaler_mm.fit_transform(X_mm_train)
X_mm_test = scaler_mm.transform(X_mm_test)

## convert to tensor
X_mm_train_t = torch.tensor(X_mm_train, dtype=torch.float32).unsqueeze(1)
X_mm_test_t = torch.tensor(X_mm_test, dtype=torch.float32).unsqueeze(1)
y_mm_train_t = torch.tensor(y_mm_train.values, dtype=torch.float32).view(-1, 1)
y_mm_test_t = torch.tensor(y_mm_test.values, dtype=torch.float32).view(-1, 1)

In [41]:
if DEVICE.type == 'cuda':
	# move to DEVICE
	X_g_train_t = X_g_train_t.to(DEVICE)
	y_g_train_t = y_g_train_t.to(DEVICE)
	X_g_test_t = X_g_test_t.to(DEVICE)
	y_g_test_t = y_g_test_t.to(DEVICE)

	X_st_train_t = X_st_train_t.to(DEVICE)
	y_st_train_t = y_st_train_t.to(DEVICE)
	X_st_test_t = X_st_test_t.to(DEVICE)
	y_st_test_t = y_st_test_t.to(DEVICE)

	X_mm_train_t = X_mm_train_t.to(DEVICE)
	y_mm_train_t = y_mm_train_t.to(DEVICE)
	X_mm_test_t = X_mm_test_t.to(DEVICE)
	y_mm_test_t = y_mm_test_t.to(DEVICE)

# Model

In [42]:
class FeedforwardModel(nn.Module):
	def __init__(self, input_dim, output_dim, dropout=0.1):
		super(FeedforwardModel, self).__init__()
		# layers
		self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(32, output_dim)
        )
	
	def forward(self, x):
		return self.model(x).view(-1,1)

class TransformerModel(nn.Module):
	def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1):
		super(TransformerModel, self).__init__()
		# layers
		self.embedding = nn.Linear(input_dim, model_dim)
		encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, batch_first=True)
		self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
		self.fc = nn.Linear(model_dim, output_dim)
		self.dropout = nn.Dropout(dropout)
	
	def forward(self, x):
		x = self.embedding(x)
		x = self.dropout(x)
		x = self.transformer(x)
		x = self.fc(x.mean(dim=1))
		return x

In [43]:
models_folder = '../models/'
models_path = {
	'general': {
		'tabnet': models_folder + 'tabnet/' + 'general.zip',
		'transformer': models_folder + 'transformer/' + 'general.pt',
		'feedforward': models_folder + 'feedforward/' + 'general.pt',
		'xgboost': models_folder + 'xgboost/' + 'general.json'
	},
	'single_thread': {
		'tabnet': models_folder + 'tabnet/' + 'single_thread.zip',
		'transformer': models_folder + 'transformer/' + 'single_thread.pt',
		'feedforward': models_folder + 'feedforward/' + 'single_thread.pt',
		'xgboost': models_folder + 'xgboost/' + 'single_thread.json'
	},
	'multi_thread': {
		'tabnet': models_folder + 'tabnet/' + 'multi_thread.zip',
		'transformer': models_folder + 'transformer/' + 'multi_thread.pt',
		'feedforward': models_folder + 'feedforward/' + 'multi_thread.pt',
		'xgboost': models_folder + 'xgboost/' + 'multi_thread.json'
	}
}
scalers_path = {
	'general': models_folder + 'scaler_g.joblib',
	'single_thread': models_folder + 'scaler_st.joblib',
	'multi_thread': models_folder + 'scaler_mm.joblib'
}

In [44]:
def ensemble_predict(models, X, X_t):
	predictions = []
	with torch.no_grad():
		print(models[0].model[0])
		print(X_t.shape)
		predictions.append(models[0](X_t).cpu().numpy().flatten())
	predictions.append(models[2].predict(X).flatten())
	predictions.append(models[3].predict(X).flatten())
	avg_predictions = np.mean(predictions, axis=0)
	return avg_predictions

# Training

In [45]:
output_dim = 1

In [46]:
#dump(scaler_g, f'{models_folder}/scaler_g.joblib')
#dump(scaler_st, f'{models_folder}/scaler_st.joblib')
#dump(scaler_mm, f'{models_folder}/scaler_mm.joblib')

## General

In [47]:
# general models
## TabNet
g_tabnet = TabNetRegressor()
g_tabnet.load_model(models_path['general']['tabnet'])
## XGBoost
g_xgboost = xgb.XGBRegressor()
g_xgboost.load_model(models_path['general']['xgboost'])



In [48]:
models_g = [
    torch.load(models_path["general"]["feedforward"]).to(DEVICE),
    torch.load(models_path["general"]["transformer"]).to(DEVICE),
    g_tabnet,
    g_xgboost
]

In [49]:
preds = ensemble_predict(models_g, X_g_test, X_g_test_t)
mse = mean_squared_error(y_g_test, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_g_test, preds)}")

Linear(in_features=16, out_features=64, bias=True)
torch.Size([900, 1, 16])
MSE: 97.95959151197678 - RMSE: 9.897453789332728 - MAE: 6.771173392910427


## Single Thread

In [50]:
# general models
## TabNet
st_tabnet = TabNetRegressor()
st_tabnet.load_model(models_path['single_thread']['tabnet'])
## XGBoost
st_xgboost = xgb.XGBRegressor()
st_xgboost.load_model(models_path['single_thread']['xgboost'])



In [51]:
models_st = [
    torch.load(models_path["single_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["single_thread"]["transformer"]).to(DEVICE),
    st_tabnet,
    st_xgboost
]

In [52]:
preds = ensemble_predict(models_st, X_st_test, X_st_test_t)
mse = mean_squared_error(y_st_test, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_st_test, preds)}")

Linear(in_features=14, out_features=64, bias=True)
torch.Size([675, 1, 14])
MSE: 56.74999736758766 - RMSE: 7.533259411940336 - MAE: 4.2718147920961735


## Multi Thread

In [53]:
# general models
## TabNet
mm_tabnet = TabNetRegressor()
mm_tabnet.load_model(models_path['multi_thread']['tabnet'])
## XGBoost
mm_xgboost = xgb.XGBRegressor()
mm_xgboost.load_model(models_path['multi_thread']['xgboost'])



In [54]:
models_mm = [
    torch.load(models_path["multi_thread"]["feedforward"]).to(DEVICE),
    torch.load(models_path["multi_thread"]["transformer"]).to(DEVICE),
    mm_tabnet,
    mm_xgboost
]

In [55]:
preds = ensemble_predict(models_mm, X_mm_test, X_mm_test_t)
mse = mean_squared_error(y_mm_test, preds)
print(f"MSE: {mse} - RMSE: {np.sqrt(mse)} - MAE: {mean_absolute_error(y_mm_test, preds)}")

Linear(in_features=16, out_features=64, bias=True)
torch.Size([225, 1, 16])
MSE: 492.0993801494934 - RMSE: 22.183313101281634 - MAE: 22.010240815056694


# Load models

In [56]:
def describe_val(model, X, X_t, y):
	min_instance = {"prediction": float('inf'), "actual": 0, "index": 0}
	max_instance = {"prediction": 0, "actual": 0, "index": 0}
	
	predictions = ensemble_predict(model, X, X_t)
	index_min = np.argmin(np.abs(predictions - y))
	min_instance["prediction"] = predictions[index_min]
	min_instance["actual"] = y[index_min]
	min_instance["index"] = index_min
	index_max = np.argmax(np.abs(predictions - y))
	max_instance["prediction"] = predictions[index_max]
	max_instance["actual"] = y[index_max]
	max_instance["index"] = index_max

	return min_instance, max_instance, predictions

In [57]:
# general model
print("Validation set general model")
min_instance, max_instance, predictions = describe_val(models_g, X_g_test, X_g_test_t, y_g_test)
errors = np.abs(predictions - y_g_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_g_test)} | Std actual: {np.std(y_g_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("---")
print("Min instance")
print(g_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(g_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set general model
Linear(in_features=16, out_features=64, bias=True)
torch.Size([900, 1, 16])
Mean prediction: 23.126462936401367 | Std actual: 3.2905521392822266
Mean actual: 27.556500000000003 | Std actual: 7.4113266524961645
Mean Error: 6.771173392910427 | Std Error: 7.218781226434187
---
Min instance
total_time                                                            24.22
total_cpu_usage                                                        0.99
max_ram_usage                                                     25.316406
brand_raw                         Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz
count                                                                    24
l2_cache_size                                                           6.0
l3_cache_size                                                          30.0
l2_cache_line_size                                                      256
l2_cache_associativity                                                    6

In [58]:
# single thread model
print("Validation set single thread model")
min_instance, max_instance, predictions = describe_val(models_st, X_st_test, X_st_test_t, y_st_test)
errors = np.abs(predictions - y_st_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_st_test)} | Std actual: {np.std(y_st_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(st_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(st_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set single thread model
Linear(in_features=14, out_features=64, bias=True)
torch.Size([675, 1, 14])
Mean prediction: 23.79733657836914 | Std actual: 3.99114727973938
Mean actual: 24.564000000000007 | Std actual: 6.096883138128859
Mean Error: 4.2718147920961735 | Std Error: 6.204965410831554
Min instance
total_time                                                           24.81
total_cpu_usage                                                        1.0
max_ram_usage                                                    31.253906
brand_raw                         Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz
count                                                                    8
l2_cache_size                                                          1.0
l3_cache_size                                                          8.0
l2_cache_line_size                                                     256
l2_cache_associativity                                                   6
benchmark

In [59]:
# multi thread model
print("Validation set multi thread model")
min_instance, max_instance, predictions = describe_val(models_mm, X_mm_test, X_mm_test_t, y_mm_test)
errors = np.abs(predictions - y_mm_test)
mean_error = np.mean(errors)
std_error = np.std(errors)

print(f"Mean prediction: {np.mean(predictions)} | Std actual: {np.std(predictions)}")
print(f"Mean actual: {np.mean(y_mm_test)} | Std actual: {np.std(y_mm_test)}")
print(f"Mean Error: {mean_error} | Std Error: {std_error}")
print("Min instance")
print(mm_test.iloc[min_instance["index"]])
print(f"Min Prediction: {min_instance['prediction']} | Actual: {min_instance['actual']} | Error: {abs(min_instance['prediction'] - min_instance['actual'])}")
print("---")
print("Max instance")
print(mm_test.iloc[max_instance["index"]])
print(f"Max Prediction: {max_instance['prediction']} | Actual: {max_instance['actual']} | Error: {abs(max_instance['prediction'] - max_instance['actual'])}")

Validation set multi thread model
Linear(in_features=16, out_features=64, bias=True)
torch.Size([225, 1, 16])
Mean prediction: 14.523759841918945 | Std actual: 2.6294972896575928
Mean actual: 36.534 | Std actual: 0.8569854141115829
Mean Error: 22.010240815056694 | Std Error: 2.7656245972123217
Min instance
total_time                                                            26.65
total_cpu_usage                                                        1.92
max_ram_usage                                                   2333.460938
brand_raw                         Intel(R) Xeon(R) CPU E5-2623 v3 @ 3.00GHz
count                                                                     8
l2_cache_size                                                           2.0
l3_cache_size                                                          10.0
l2_cache_line_size                                                      256
l2_cache_associativity                                                    2
benchmar