In [1]:
import os
import sys

sys.path.append('../../txf_design-space/')
sys.path.append('../../txf_design-space/flexibert')
sys.path.append('../../boshnas/boshnas/')
sys.path.append('../utils')
sys.path.append('../')

import yaml
import json
import time
import torch
import shlex
import shutil
import argparse
import subprocess
import collections
import numpy as np
from tqdm import tqdm

from embeddings.utils import graph_util, print_util as pu

sys.path.append('../../txf_design-space/transformers/src/transformers')
import embedding_util, energy_util

from boshnas import BOSHNAS
from acq import gosh_acq as acq

from transformers import BertModel
from transformers import RobertaTokenizer, RobertaModel
from transformers.models.bert.configuration_bert import BertConfig
from transformers.models.bert.modeling_modular_bert import BertModelModular, BertForMaskedLMModular, BertForSequenceClassificationModular

import warnings
warnings.filterwarnings("ignore")

from run_energy_profiler import *

In [2]:
# Load design space
design_space = yaml.safe_load(open('../design_space/design_space.yaml'))

# Load dataset
dataset = json.load(open('../dataset/dataset.json'))
for key in dataset.keys():
    dataset[key]['embedding'] = eval(dataset[key]['embedding'])
    
X, latency, energy, peak_power = convert_to_tabular(dataset)
max_latency, max_energy, max_peak_power = np.amax(latency), np.amax(energy), np.amax(peak_power)

mean_X = np.mean(X, axis=0)

# Increase maximum values
max_latency, max_energy, max_peak_power = 1.2 * max_latency, 1.2 * max_energy, 1.2 * max_peak_power
print(f'Max latency: {max_latency : 0.3f}s/seq. Max energy: {max_energy : 0.3f}J/seq. Max peak power: {max_peak_power : 0.3f}W')
    
# Get the embedding for model with hash: 233489f38df291297bee3ba92de24c06
model_hash = '233489f38df291297bee3ba92de24c06'
print(np.array(dataset[model_hash]['embedding']))

# Get initialization parameters
embedding_dim = len(dataset[list(dataset.keys())[0]]['embedding'])
embedding_bounds = embedding_util.get_embedding_bounds(design_space, 'all')

max_X = np.array([bound[1] for bound in embedding_bounds])

embedding_bounds = (np.array([bound[0] for bound in embedding_bounds]), np.array([bound[1] for bound in embedding_bounds]))

normalized_embedding_bounds = \
    (np.array([0 for i in range(embedding_dim)]), np.array([1 for i in range(embedding_dim)]))    

surrogate_models = \
    init_surrogate_models(embedding_dim, embedding_bounds, '../dataset/surrogate_models/', True)

X_ds = convert_to_tabular({model_hash: dataset[model_hash]}, only_embeddings=True)

# We see a non-zero epistemic uncertainty
get_predictions(surrogate_models, X_ds-mean_X)

Evaluated model in tabular dataset: 233489f38df291297bee3ba92de24c06
Evaluated model in tabular dataset: c5093d6937238d9cd014503cae891bc7
Evaluated model in tabular dataset: 6749823236fe693ccee504e359937975
Evaluated model in tabular dataset: a3f5d9305cc6a5346b6c0501057edd84
Evaluated model in tabular dataset: 25e3e9a5d81e2a16f1ecd44a6a8ceefa
Evaluated model in tabular dataset: 9f910f6a26e43d5189ce063b86a41446
Evaluated model in tabular dataset: 64f935f62eca9a1f8a42fbe62a17ddb3
Evaluated model in tabular dataset: 65a5e226d7f55d289e135de50997307e
Evaluated model in tabular dataset: 85946dcff3ff8f9c4d5446edf5f1d94e
Evaluated model in tabular dataset: 6f22fa64b794599aa9b298a701e85d01
Evaluated model in tabular dataset: 5e5bc47d7e79e7a6b9c1106769be0dba
Evaluated model in tabular dataset: 4549dacaf6b46bbe0933c931705a8940
Evaluated model in tabular dataset: 580d4988ac728197889a329f16ae060b
Evaluated model in tabular dataset: 3d5c66def824c314a602b7423a5f4903
Evaluated model in tabular dataset

([(tensor([0.]), (tensor([1.]), 0))],
 [(tensor([0.9998]), (tensor([0.0003]), 0))],
 [(tensor([1.]), (tensor([1.]), 0))],
 tensor([2.0003]),
 0)

In [3]:
get_predictions(surrogate_models, (X-mean_X)/max_X)

([(tensor([0.5864]), (tensor([0.5780]), 0)),
  (tensor([0.5845]), (tensor([0.5760]), 0)),
  (tensor([0.5921]), (tensor([0.5802]), 0)),
  (tensor([0.5904]), (tensor([0.5779]), 0)),
  (tensor([0.5851]), (tensor([0.5775]), 0)),
  (tensor([0.5894]), (tensor([0.5811]), 0)),
  (tensor([0.5873]), (tensor([0.5749]), 0)),
  (tensor([0.5817]), (tensor([0.5772]), 0)),
  (tensor([0.5884]), (tensor([0.5722]), 0)),
  (tensor([0.5807]), (tensor([0.5745]), 0)),
  (tensor([0.5843]), (tensor([0.5707]), 0)),
  (tensor([0.5852]), (tensor([0.5775]), 0)),
  (tensor([0.5843]), (tensor([0.5737]), 0)),
  (tensor([0.5832]), (tensor([0.5757]), 0)),
  (tensor([0.5843]), (tensor([0.5773]), 0)),
  (tensor([0.5869]), (tensor([0.5772]), 0)),
  (tensor([0.5847]), (tensor([0.5777]), 0))],
 [(tensor([0.4938]), (tensor([0.4330]), 0)),
  (tensor([0.4937]), (tensor([0.4276]), 0)),
  (tensor([0.4958]), (tensor([0.4326]), 0)),
  (tensor([0.4952]), (tensor([0.4236]), 0)),
  (tensor([0.4896]), (tensor([0.4296]), 0)),
  (tensor

In [4]:
# Train surrogate models on the normalized dataset
latency, energy, peak_power = latency/max_latency, energy/max_energy, peak_power/max_peak_power
train_surrogate_models(surrogate_models, (X-mean_X)/max_X, latency, energy, peak_power)

# We see a non-zero epistemic uncertainty
get_predictions(surrogate_models, (X-mean_X)/max_X)

([(tensor([0.4834]), (tensor([0.0625]), 0)),
  (tensor([0.4482]), (tensor([0.0661]), 0)),
  (tensor([0.5132]), (tensor([0.0630]), 0)),
  (tensor([0.5275]), (tensor([0.0663]), 0)),
  (tensor([0.2622]), (tensor([0.0646]), 0)),
  (tensor([0.8217]), (tensor([0.0621]), 0)),
  (tensor([0.5896]), (tensor([0.0653]), 0)),
  (tensor([0.7882]), (tensor([0.0620]), 0)),
  (tensor([0.5783]), (tensor([0.0616]), 0)),
  (tensor([0.5036]), (tensor([0.0645]), 0)),
  (tensor([0.6091]), (tensor([0.0650]), 0)),
  (tensor([0.2616]), (tensor([0.0651]), 0)),
  (tensor([0.2589]), (tensor([0.0644]), 0)),
  (tensor([0.3191]), (tensor([0.0645]), 0)),
  (tensor([0.2489]), (tensor([0.0648]), 0)),
  (tensor([0.2565]), (tensor([0.0650]), 0)),
  (tensor([0.2837]), (tensor([0.0652]), 0))],
 [(tensor([0.4703]), (tensor([0.0603]), 0)),
  (tensor([0.4575]), (tensor([0.0575]), 0)),
  (tensor([0.4970]), (tensor([0.0597]), 0)),
  (tensor([0.5043]), (tensor([0.0565]), 0)),
  (tensor([0.2690]), (tensor([0.0599]), 0)),
  (tensor

In [5]:
latency, energy, peak_power

(array([0.50794941, 0.46322158, 0.5299725 , 0.55653288, 0.28797305,
        0.83333333, 0.6225946 , 0.82762999, 0.5978553 , 0.50909825,
        0.63592478, 0.28402982, 0.28320672, 0.34924548, 0.2638582 ,
        0.27996118, 0.3165051 ]),
 array([0.46015746, 0.47726582, 0.51435174, 0.52287294, 0.2386085 ,
        0.83333333, 0.59214409, 0.81465537, 0.58527834, 0.50316834,
        0.62450576, 0.20652767, 0.24696664, 0.3017484 , 0.23711017,
        0.28845448, 0.22602613]),
 array([0.63790534, 0.67373874, 0.58835356, 0.4624932 , 0.3267284 ,
        0.61936777, 0.74753424, 0.83333333, 0.4472374 , 0.55160363,
        0.63335226, 0.39762648, 0.43721469, 0.56476028, 0.60987724,
        0.37214102, 0.3810698 ]))

In [36]:
# Get random samples of the entire dataset to obtain maximum uncertainty
random_samples = embedding_util.get_samples(design_space, num_samples=16, sampling_method='Random', debug=False)
X_ds = convert_to_tabular(random_samples, only_embeddings=True)

get_predictions(surrogate_models, (X_ds-mean_X)/max_X)

0.06571674346923828

In [38]:
# Test GP
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(random_state=0)
gpr.fit(X, latency)

print(gpr.predict(X, return_std=True))
gpr.predict(X_ds, return_std=True)

(array([0.50794941, 0.46322158, 0.5299725 , 0.55653288, 0.28797305,
       0.83333333, 0.6225946 , 0.82762999, 0.5978553 , 0.50909825,
       0.63592478, 0.28402982, 0.28320672, 0.34924548, 0.2638582 ,
       0.27996118, 0.3165051 ]), array([1.00000004e-05, 1.00000004e-05, 1.00000004e-05, 1.00000004e-05,
       1.00000004e-05, 1.00000004e-05, 1.00000004e-05, 1.00000004e-05,
       1.00000004e-05, 1.00000004e-05, 1.00000004e-05, 1.00000004e-05,
       1.00000004e-05, 1.00000004e-05, 1.00000004e-05, 1.00000004e-05,
       1.00000004e-05]))


(array([0.        , 0.        , 0.        , 0.6225946 , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.83333333,
        0.28797305, 0.        , 0.        , 0.        , 0.        ,
        0.        ]),
 array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000004e-05,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000004e-05, 1.00000004e-05, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00]))

In [32]:
str(type(surrogate_models[0]))

"<class 'boshnas.BOSHNAS'>"