In [1]:
import numpy as np
import torch
import torch.nn
import torch.optim
from abc import abstractmethod
from collections import defaultdict
from functools import lru_cache
from itertools import count
from typing import List, Dict
from typing import Tuple, Any
from sklearn import ensemble
from sklearn.metrics import mean_squared_error
from torch.nn import MSELoss, ReLU, L1Loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
from importlib import reload
from config import Config
import config
from data import MDataset, Graph, load_graphs, save_dataset_pkl, load_dataset_pkl, save_scalers_pkl, load_scalers_pkl
import data
from base_module import MModule, nested_detach
import base_module
from executor import single_train_loop, grid_search_loop
import executor
from objects import ModelType
import objects
from metric import MetricUtil
import metric
from logger import init_logging
import logger
reload(config)
reload(data)
reload(base_module)
reload(executor)
reload(objects)
reload(metric)
reload(logger)
from config import Config
from data import MDataset, Graph, load_graphs
from base_module import MModule
from objects import ModelType
from metric import MetricUtil
from logger import init_logging
init_logging()

datasets_path: /root/guohao/repos/DLT-perf-model/datasets
configs_path: /root/guohao/repos/DLT-perf-model/notebooks/configs
datasets_path: /root/guohao/repos/DLT-perf-model/datasets
configs_path: /root/guohao/repos/DLT-perf-model/notebooks/configs


In [3]:
dataset_environment_str = "RTX2080Ti_CPUALL"
normalizer_cls = StandardScaler # MinMaxScaler
dummy = False

In [21]:
train_configs = {
    ModelType.MLP.name: Config.from_dict({
        "model": "MLP",
        "all_seed": 42,
        "dataset_environment_str": dataset_environment_str,
        "dataset_normalization": "Standard",
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": False,
        "batch_size": 32,
        "eval_steps": 10000,
        "learning_rate": 1e-4,
        "epochs": 10,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    }),
    ModelType.PerfNet.name: Config.from_dict({
        "model": "PerfNet",
        "dataset_environment_str": dataset_environment_str,
        "dataset_normalization": "Standard",
        "all_seed": 42,
        "dataset_params": {
            "duration_summed": False,
        },
        "dataset_dummy": True,
        "batch_size": 32,
        "eval_steps": 10000,
        "learning_rate": 1e-4,
        "epochs": 10,
        "optimizer": "Adam",
        "meta_configs": {
            "learning_rate": 0.005,
            "meta_learning_rate": 0.001,
            "meta_train_steps": 1000,
            "meta_task_per_step": 8,
            "meta_fast_adaption_step": 5,
            "meta_dataset_train_environment_strs": [dataset_environment_str],
            "meta_dataset_eval_environment_strs": [dataset_environment_str],
        },
    })
}

model_type = ModelType.MLP
conf = train_configs[model_type.name]

In [5]:
eval_graphs = load_graphs(dataset_environment_str,
                            train_or_eval="train",
                            use_dummy=dummy,
                            max_row=400_000)
train_graphs = load_graphs(dataset_environment_str,
                            train_or_eval="train",
                            use_dummy=dummy,
                            max_row=200_0000)

[2023-12-20 09:16:42,629] {data.py:441} INFO - Loading graphs train
[2023-12-20 09:16:42,630] {data.py:412} INFO - Loading merged.csv
[2023-12-20 09:16:42,630] {data.py:412} INFO - Loading merged.csv
[2023-12-20 09:16:43,623] {data.py:415} INFO - Loaded merged.csv, 400000 rows
[2023-12-20 09:16:44,271] {data.py:421} INFO - Loaded rand_5000.207_7.csv, 1332 rows
[2023-12-20 09:16:44,896] {data.py:421} INFO - Loaded rand_1000.146_7.csv, 1366 rows
[2023-12-20 09:16:46,070] {data.py:421} INFO - Loaded rand_5500.22_7.csv, 2609 rows
[2023-12-20 09:16:46,959] {data.py:421} INFO - Loaded rand_4500.22_7.csv, 1996 rows
[2023-12-20 09:16:47,055] {data.py:421} INFO - Loaded vgg16.85_7.csv, 173 rows
[2023-12-20 09:16:47,211] {data.py:421} INFO - Loaded resnet18.138_7.csv, 315 rows
[2023-12-20 09:16:47,532] {data.py:421} INFO - Loaded rand_2000.251_7.csv, 683 rows
[2023-12-20 09:16:48,127] {data.py:421} INFO - Loaded rand_6500.11_7.csv, 1302 rows
[2023-12-20 09:16:49,433] {data.py:421} INFO - Loaded 

In [6]:
len(eval_graphs), len(train_graphs)

(295, 1501)

In [7]:

def init_dataset(graphs: List[Graph]) -> MDataset:
    op_X, op_Y = list(), list()
    data_idx_to_graph = dict()
    counter = iter(count())
    op_feature_len = 0

    def node_features(g: Graph) -> Tuple[
        List[Dict], List[Dict]]:
        X, Y = list(), list()
        for i, node in enumerate(g.nodes):
            x_op_feature = node.op.to_feature_array("complex")
            x = {
                "x_op_feature": x_op_feature
            }
            node_durations = (node.duration,node.gap)
            x["x_id"] = i
            x["x_graph_id"] = g.ID
            y = {"y_node_durations": node_durations, "y_id": i, "y_graph_id": g.ID}
            X.append(x)
            Y.append(y)
        return X, Y

    for graph in graphs:
        X, Y = node_features(graph)
        for x in X:
            # if len(x['x_op_feature'])!=37:
            #     print(x['x_graph_id'], len(x['x_op_feature']))
            op_feature_len = max(op_feature_len, len(x["x_op_feature"]))
        op_X.extend(X)
        op_Y.extend(Y)
        for i in range(len(X)):
            data_idx_to_graph[next(counter)] = graph
    for x in op_X:
        v = x["x_op_feature"]
        x["x_op_feature"] = np.pad(v, (0, op_feature_len - v.size))

    dataset = MDataset(op_X, op_Y)
    return dataset

train_ds = init_dataset(train_graphs)
eval_ds = init_dataset(eval_graphs)

In [8]:
def get_scalers(ds):
    scaler_cls = normalizer_cls
    op_feature_array = list()
    y_array = list()

    for data in ds:
        feature, label = data
        op_feature_array.append(feature["x_op_feature"])
        y_array.append(label["y_node_durations"])

    op_feature_array = np.array(op_feature_array)
    y_array = np.array(y_array)

    op_feature_scaler = scaler_cls()
    op_feature_scaler.fit(op_feature_array)

    y_scaler = scaler_cls()
    y_scaler.fit(y_array)
    return op_feature_scaler, y_scaler

scalers = get_scalers(train_ds)
op_feature_scaler, y_scaler = scalers

In [9]:

def preprocess_dataset(ds: MDataset) -> MDataset:
    op_feature_array = list()
    y_array = list()

    for data in ds:
        feature, label = data
        op_feature_array.append(feature["x_op_feature"])
        y_array.append(label["y_node_durations"])

    op_feature_array = np.array(op_feature_array, dtype=np.float32)
    y_array = np.array(y_array, dtype=np.float32)


    op_feature_array = op_feature_scaler.transform(op_feature_array)
    y_array = y_scaler.transform(y_array)

    processed_features = list()
    processed_labels = list()
    for i, data in enumerate(ds):
        feature, label = data
        processed_features.append({
            "x_id": feature["x_id"],
            "x_graph_id": feature["x_graph_id"],
            # 运行时再传到cuda那边
            # "x_op_feature": torch.Tensor(op_feature_array[i]).to(device=self.conf.device)
            "x_op_feature": torch.Tensor(op_feature_array[i])
        })
        processed_labels.append({
            "y_id": label["y_id"],
            "y_graph_id": label["y_graph_id"],
            # "y_node_durations": torch.Tensor(y_array[i]).to(device=self.conf.device)
            "y_node_durations": torch.Tensor(y_array[i])
        })

    ds = MDataset(processed_features, processed_labels)
    return ds

preprocessed_train_ds = preprocess_dataset(train_ds)
preprocessed_eval_ds = preprocess_dataset(eval_ds)

In [10]:
# save_dataset_pkl(preprocessed_train_ds, conf.dataset_environment, "OpBased", 'train',
#                          conf.dataset_normalization)
# save_dataset_pkl(preprocessed_eval_ds, conf.dataset_environment, "OpBased", 'eval',
#                          conf.dataset_normalization)
# save_scalers_pkl(scalers, conf.dataset_environment, "OpBased", 'train',
#                          conf.dataset_normalization)

In [11]:
# preprocessed_train_ds = load_dataset_pkl(conf.dataset_environment, "OpBased", 'train', 
#                                          conf.dataset_normalization)
# preprocessed_eval_ds = load_dataset_pkl(conf.dataset_environment, "OpBased", 'eval',
#                                         conf.dataset_normalization)
# scalers = load_scalers_pkl(conf.dataset_environment, "OpBased", 'train',
#                            conf.dataset_normalization)


In [12]:
op_feature_scaler, y_scaler = scalers

In [13]:
def compute_evaluate_metrics(input_batches, output_batches, eval_loss) -> Dict[str, float]:
    batches_len = len(input_batches)

    def compute_op_durations(_logits):
        transformed: np.ndarray = y_scaler.inverse_transform(_logits)
        durations = transformed.sum(axis=1)
        return durations

    graph_id_to_duration_pred = defaultdict(int)
    for idx in range(batches_len):
        inputs = input_batches[idx]
        logits = output_batches[idx]
        logits = nested_detach(logits)
        logits = logits.cpu().numpy()
        graph_ids = inputs["x_graph_id"]
        op_durations = compute_op_durations(logits)
        for i, graph_id in enumerate(graph_ids):
            op_duration = op_durations[i].item()
            graph_id_to_duration_pred[graph_id] += op_duration
    duration_metrics = MetricUtil.compute_duration_metrics(eval_graphs, graph_id_to_duration_pred)
    return {"eval_loss": eval_loss, **duration_metrics}


In [14]:

def to_device(conf, features, labels):
    features['x_op_feature'] = features["x_op_feature"].to(device=conf.device)
    labels['y_node_durations'] = labels['y_node_durations'].to(device=conf.device)
    return features, labels

In [15]:
class MLPModel(MModule):

    @staticmethod
    def dimension_len(t):
        return t[-1] - t[0]
    
    @staticmethod
    def grid_search_model_params() -> Dict[str, List[Any]]:
        return {
            "learning_rate": [1e-3, 1e-4, 1e-5],
            'batch_size': [16, 32, 64],
            'epochs': [5, 10, 20, 30],
            'optimizer': ['Adam', 'SGD'],
        }
        # return {}
    
    def __init__(self, input_dimension, output_dimension, **kwargs):
        super().__init__(**kwargs)
        self.input = torch.nn.Linear(input_dimension, 256)
        self.relu1 = ReLU()
        self.dense1 = torch.nn.Linear(256, 128)
        self.relu2 = ReLU()
        self.dense2 = torch.nn.Linear(128, 64)
        self.relu3 = ReLU()
        self.output = torch.nn.Linear(64, output_dimension)
        self.loss_fn = L1Loss()

    def forward(self, X):
        X = X["x_op_feature"]
        X = self.input(X)
        X = self.relu1(X)
        X = self.dense1(X)
        X = self.relu2(X)
        X = self.dense2(X)
        X = self.relu3(X)
        Y = self.output(X)
        return Y

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_node_durations"]
        loss = self.loss_fn(outputs, node_durations)
        return loss


def init_MLP_model() -> MModule | Any:
    sample_preprocessed_ds = preprocessed_train_ds
    sample_x_dict = sample_preprocessed_ds.features[0]
    sample_y_dict = sample_preprocessed_ds.labels[0]
    return MLPModel(input_dimension=len(sample_x_dict["x_op_feature"]),
                    output_dimension=len(sample_y_dict["y_node_durations"]))

In [16]:

class PerfNetModel(MModule):
    @staticmethod
    def dimension_len(t):
        return t[-1] - t[0]
    
    @staticmethod
    def grid_search_model_params() -> Dict[str, List[Any]]:
        return {
            "learning_rate": [1e-3, 1e-4, 1e-5],
            'batch_size': [16, 32, 64],
            'epochs': [5, 10, 20, 30],
            'optimizer': ['Adam', 'SGD'],
        }
        # return {}

    def __init__(self, output_dimension, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = torch.nn.LazyConv1d(out_channels=32, kernel_size=3, bias=True, padding_mode='zeros')
        self.conv2 = torch.nn.LazyConv1d(out_channels=128, kernel_size=2, bias=True, padding_mode='zeros')
        self.flatten = torch.nn.Flatten()
        self.dense1 = torch.nn.LazyLinear(32)
        self.relu1 = ReLU()
        self.dense2 = torch.nn.LazyLinear(64)
        self.relu2 = ReLU()
        self.dense3 = torch.nn.LazyLinear(128)
        self.relu3 = ReLU()
        self.dense4 = torch.nn.LazyLinear(256)
        self.relu4 = ReLU()
        self.dropout = torch.nn.Dropout(p=0.3)
        self.output = torch.nn.LazyLinear(output_dimension)
        self.loss_fn = L1Loss()

    def forward(self, X):
        X = X["x_op_feature"]
        X = torch.unsqueeze(X, dim=1)
        X = self.conv1(X)
        X = self.conv2(X)
        X = self.flatten(X)
        X = self.dense1(X)
        X = self.relu1(X)
        X = self.dense2(X)
        X = self.relu2(X)
        X = self.dense3(X)
        X = self.relu3(X)
        X = self.dense4(X)
        X = self.relu4(X)
        X = self.dropout(X)
        Y = self.output(X)
        return Y

    def compute_loss(self, outputs, Y):
        node_durations = Y["y_node_durations"]
        loss = self.loss_fn(outputs, node_durations)
        return loss


def init_PerfNet_model() -> MModule | Any:
    sample_y_dict = preprocessed_train_ds.labels[0]
    return PerfNetModel(output_dimension=len(sample_y_dict["y_node_durations"]))



In [17]:
init_model_funcs = {
    ModelType.MLP.name: init_MLP_model,
    ModelType.PerfNet.name: init_PerfNet_model,
}


init_model = init_model_funcs[model_type.name]

model = init_model()
model = model.to(conf.device)




In [22]:
single_train_loop(model_type, conf, preprocessed_train_ds, preprocessed_eval_ds, model, compute_evaluate_metrics, to_device)

[2023-12-20 11:11:11,559] {executor.py:148} INFO - ModelType.MLP start single training.
[2023-12-20 11:11:11,560] {executor.py:150} INFO - ModelType.MLP training epoch 0


  0%|          | 0/62500 [00:00<?, ?it/s]

[2023-12-20 11:11:11,684] {executor.py:111} INFO - ModelType.MLP trained for 0.125396048 seconds.
[2023-12-20 11:11:11,685] {executor.py:112} INFO - ModelType.MLP eval at step 0.
[2023-12-20 11:11:24,019] {executor.py:116} INFO - ModelType.MLP train loss: 0.016828374937176704, eval metrics: {'eval_loss': 0.01839613865005318, 'MRE': 0.1559010296051249, 'MAE': 0.11380958377369707, 'RMSE': 104.96509482974797}
[2023-12-20 11:11:24,073] {executor.py:187} INFO - Saving model at step 0 with loss 0.016828374937176704,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 16%|█▌        | 9997/62500 [00:48<03:09, 277.38it/s]

[2023-12-20 11:12:00,213] {executor.py:111} INFO - ModelType.MLP trained for 48.654025729 seconds.
[2023-12-20 11:12:00,213] {executor.py:112} INFO - ModelType.MLP eval at step 10000.
[2023-12-20 11:12:12,457] {executor.py:116} INFO - ModelType.MLP train loss: 0.013243342749774456, eval metrics: {'eval_loss': 0.019142582029993645, 'MRE': 0.15285790712584243, 'MAE': 0.11910142708700426, 'RMSE': 108.45925683408034}
[2023-12-20 11:12:12,510] {executor.py:187} INFO - Saving model at step 10000 with loss 0.013243342749774456,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 32%|███▏      | 19984/62500 [01:37<02:32, 279.62it/s] 

[2023-12-20 11:12:49,193] {executor.py:111} INFO - ModelType.MLP trained for 97.6344785 seconds.
[2023-12-20 11:12:49,194] {executor.py:112} INFO - ModelType.MLP eval at step 20000.
[2023-12-20 11:13:01,503] {executor.py:116} INFO - ModelType.MLP train loss: 0.008859140798449516, eval metrics: {'eval_loss': 0.018111656830306164, 'MRE': 0.16108330630027592, 'MAE': 0.11362498468485534, 'RMSE': 80.83890692891974}
[2023-12-20 11:13:01,557] {executor.py:187} INFO - Saving model at step 20000 with loss 0.008859140798449516,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 48%|████▊     | 30000/62500 [02:26<01:52, 288.76it/s] 

[2023-12-20 11:13:38,381] {executor.py:111} INFO - ModelType.MLP trained for 146.822258318 seconds.
[2023-12-20 11:13:38,382] {executor.py:112} INFO - ModelType.MLP eval at step 30000.
[2023-12-20 11:13:50,706] {executor.py:116} INFO - ModelType.MLP train loss: 0.01575985923409462, eval metrics: {'eval_loss': 0.01926601150882896, 'MRE': 0.1667873339512671, 'MAE': 0.12523671049128807, 'RMSE': 127.22244396657514}
[2023-12-20 11:13:50,759] {executor.py:187} INFO - Saving model at step 30000 with loss 0.01575985923409462,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 64%|██████▍   | 39990/62500 [03:15<01:20, 278.28it/s] 

[2023-12-20 11:14:26,730] {executor.py:111} INFO - ModelType.MLP trained for 195.171721538 seconds.
[2023-12-20 11:14:26,731] {executor.py:112} INFO - ModelType.MLP eval at step 40000.


 64%|██████▍   | 39990/62500 [03:26<01:20, 278.28it/s]

[2023-12-20 11:14:38,816] {executor.py:116} INFO - ModelType.MLP train loss: 0.02496279589831829, eval metrics: {'eval_loss': 0.018242360975570044, 'MRE': 0.1452664097933803, 'MAE': 0.10465675712799272, 'RMSE': 88.25188694033537}
[2023-12-20 11:14:38,868] {executor.py:187} INFO - Saving model at step 40000 with loss 0.02496279589831829,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 80%|███████▉  | 49998/62500 [04:04<00:44, 278.44it/s]

[2023-12-20 11:15:15,750] {executor.py:111} INFO - ModelType.MLP trained for 244.191024013 seconds.
[2023-12-20 11:15:15,750] {executor.py:112} INFO - ModelType.MLP eval at step 50000.
[2023-12-20 11:15:27,886] {executor.py:116} INFO - ModelType.MLP train loss: 0.04399210959672928, eval metrics: {'eval_loss': 0.018740179296336135, 'MRE': 0.14997362929870287, 'MAE': 0.10984691110037406, 'RMSE': 89.80351948088226}
[2023-12-20 11:15:27,943] {executor.py:187} INFO - Saving model at step 50000 with loss 0.04399210959672928,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 96%|█████████▌| 59979/62500 [04:52<00:09, 275.02it/s]

[2023-12-20 11:16:04,400] {executor.py:111} INFO - ModelType.MLP trained for 292.841029848 seconds.
[2023-12-20 11:16:04,400] {executor.py:112} INFO - ModelType.MLP eval at step 60000.
[2023-12-20 11:16:16,575] {executor.py:116} INFO - ModelType.MLP train loss: 0.02036743238568306, eval metrics: {'eval_loss': 0.01831094786329195, 'MRE': 0.14411802380666883, 'MAE': 0.11121225414519058, 'RMSE': 113.04835590726833}
[2023-12-20 11:16:16,627] {executor.py:187} INFO - Saving model at step 60000 with loss 0.02036743238568306,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:14<00:00, 198.76it/s]

[2023-12-20 11:16:26,013] {executor.py:150} INFO - ModelType.MLP training epoch 1



 12%|█▏        | 7474/62500 [00:27<03:14, 283.61it/s]

[2023-12-20 11:16:53,262] {executor.py:111} INFO - ModelType.MLP trained for 341.703159268 seconds.
[2023-12-20 11:16:53,263] {executor.py:112} INFO - ModelType.MLP eval at step 70000.
[2023-12-20 11:17:05,711] {executor.py:116} INFO - ModelType.MLP train loss: 0.016216333955526352, eval metrics: {'eval_loss': 0.018367468120534903, 'MRE': 0.15022673241961748, 'MAE': 0.11045214910917342, 'RMSE': 92.59136163617825}
[2023-12-20 11:17:05,768] {executor.py:187} INFO - Saving model at step 70000 with loss 0.016216333955526352,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 28%|██▊       | 17496/62500 [01:16<02:45, 272.00it/s]

[2023-12-20 11:17:42,444] {executor.py:111} INFO - ModelType.MLP trained for 390.885591789 seconds.
[2023-12-20 11:17:42,445] {executor.py:112} INFO - ModelType.MLP eval at step 80000.
[2023-12-20 11:17:54,872] {executor.py:116} INFO - ModelType.MLP train loss: 0.019403045997023582, eval metrics: {'eval_loss': 0.01869151526482776, 'MRE': 0.13991572711579042, 'MAE': 0.11294544675274339, 'RMSE': 113.49016712904327}
[2023-12-20 11:17:54,925] {executor.py:187} INFO - Saving model at step 80000 with loss 0.019403045997023582,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 44%|████▍     | 27474/62500 [02:04<02:10, 267.44it/s] 

[2023-12-20 11:18:30,806] {executor.py:111} INFO - ModelType.MLP trained for 439.246837881 seconds.
[2023-12-20 11:18:30,806] {executor.py:112} INFO - ModelType.MLP eval at step 90000.
[2023-12-20 11:18:42,880] {executor.py:116} INFO - ModelType.MLP train loss: 0.012413423508405685, eval metrics: {'eval_loss': 0.01800793450709898, 'MRE': 0.1415720920854932, 'MAE': 0.11152169016761088, 'RMSE': 107.56785189434157}
[2023-12-20 11:18:42,935] {executor.py:187} INFO - Saving model at step 90000 with loss 0.012413423508405685,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 60%|█████▉    | 37491/62500 [02:52<01:27, 285.75it/s] 

[2023-12-20 11:19:18,966] {executor.py:111} INFO - ModelType.MLP trained for 487.407062249 seconds.
[2023-12-20 11:19:18,966] {executor.py:112} INFO - ModelType.MLP eval at step 100000.
[2023-12-20 11:19:31,506] {executor.py:116} INFO - ModelType.MLP train loss: 0.023726895451545715, eval metrics: {'eval_loss': 0.018220880532837474, 'MRE': 0.1468499744891944, 'MAE': 0.11284808069580589, 'RMSE': 109.71328622063436}
[2023-12-20 11:19:31,559] {executor.py:187} INFO - Saving model at step 100000 with loss 0.023726895451545715,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 76%|███████▌  | 47481/62500 [03:41<00:54, 274.35it/s]

[2023-12-20 11:20:07,740] {executor.py:111} INFO - ModelType.MLP trained for 536.181378528 seconds.
[2023-12-20 11:20:07,741] {executor.py:112} INFO - ModelType.MLP eval at step 110000.


 76%|███████▌  | 47481/62500 [03:52<00:54, 274.35it/s]

[2023-12-20 11:20:20,012] {executor.py:116} INFO - ModelType.MLP train loss: 0.01947357878088951, eval metrics: {'eval_loss': 0.02007912580170203, 'MRE': 0.14425070184981517, 'MAE': 0.13189265347429624, 'RMSE': 151.10305978733956}
[2023-12-20 11:20:20,064] {executor.py:187} INFO - Saving model at step 110000 with loss 0.01947357878088951,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 92%|█████████▏| 57482/62500 [04:29<00:17, 284.45it/s]

[2023-12-20 11:20:55,900] {executor.py:111} INFO - ModelType.MLP trained for 584.341126197 seconds.
[2023-12-20 11:20:55,900] {executor.py:112} INFO - ModelType.MLP eval at step 120000.
[2023-12-20 11:21:08,043] {executor.py:116} INFO - ModelType.MLP train loss: 0.033977098762989044, eval metrics: {'eval_loss': 0.01867315520533826, 'MRE': 0.1761629362708115, 'MAE': 0.11974125291230672, 'RMSE': 103.90571705800839}
[2023-12-20 11:21:08,096] {executor.py:187} INFO - Saving model at step 120000 with loss 0.033977098762989044,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:00<00:00, 208.21it/s]

[2023-12-20 11:21:26,194] {executor.py:150} INFO - ModelType.MLP training epoch 2



  8%|▊         | 4983/62500 [00:18<03:31, 271.99it/s]

[2023-12-20 11:21:44,337] {executor.py:111} INFO - ModelType.MLP trained for 632.778669413 seconds.
[2023-12-20 11:21:44,338] {executor.py:112} INFO - ModelType.MLP eval at step 130000.
[2023-12-20 11:21:56,295] {executor.py:116} INFO - ModelType.MLP train loss: 0.01423274353146553, eval metrics: {'eval_loss': 0.018435569548425266, 'MRE': 0.15541034095481002, 'MAE': 0.12077066459000929, 'RMSE': 108.85390194128188}
[2023-12-20 11:21:56,347] {executor.py:187} INFO - Saving model at step 130000 with loss 0.01423274353146553,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 24%|██▍       | 14981/62500 [01:06<02:50, 278.12it/s]

[2023-12-20 11:22:32,549] {executor.py:111} INFO - ModelType.MLP trained for 680.990503535 seconds.
[2023-12-20 11:22:32,550] {executor.py:112} INFO - ModelType.MLP eval at step 140000.
[2023-12-20 11:22:44,879] {executor.py:116} INFO - ModelType.MLP train loss: 0.01479564793407917, eval metrics: {'eval_loss': 0.0197608077154262, 'MRE': 0.16425492845742595, 'MAE': 0.10487160955196027, 'RMSE': 72.3839828314217}
[2023-12-20 11:22:44,932] {executor.py:187} INFO - Saving model at step 140000 with loss 0.01479564793407917,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 40%|███▉      | 24997/62500 [01:55<02:18, 271.64it/s] 

[2023-12-20 11:23:21,501] {executor.py:111} INFO - ModelType.MLP trained for 729.942371639 seconds.
[2023-12-20 11:23:21,502] {executor.py:112} INFO - ModelType.MLP eval at step 150000.
[2023-12-20 11:23:33,740] {executor.py:116} INFO - ModelType.MLP train loss: 0.025625932961702347, eval metrics: {'eval_loss': 0.017935092257591895, 'MRE': 0.14369525176235584, 'MAE': 0.09496642543878071, 'RMSE': 66.41245698790847}
[2023-12-20 11:23:33,793] {executor.py:187} INFO - Saving model at step 150000 with loss 0.025625932961702347,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 56%|█████▌    | 34997/62500 [02:44<01:40, 272.95it/s] 

[2023-12-20 11:24:10,906] {executor.py:111} INFO - ModelType.MLP trained for 779.346930868 seconds.
[2023-12-20 11:24:10,906] {executor.py:112} INFO - ModelType.MLP eval at step 160000.
[2023-12-20 11:24:23,586] {executor.py:116} INFO - ModelType.MLP train loss: 0.018370017409324646, eval metrics: {'eval_loss': 0.018280607412955723, 'MRE': 0.13922938977549448, 'MAE': 0.10365205435190242, 'RMSE': 80.50186644772045}
[2023-12-20 11:24:23,642] {executor.py:187} INFO - Saving model at step 160000 with loss 0.018370017409324646,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 72%|███████▏  | 44995/62500 [03:34<01:03, 274.93it/s] 

[2023-12-20 11:25:00,355] {executor.py:111} INFO - ModelType.MLP trained for 828.796184255 seconds.
[2023-12-20 11:25:00,356] {executor.py:112} INFO - ModelType.MLP eval at step 170000.
[2023-12-20 11:25:12,558] {executor.py:116} INFO - ModelType.MLP train loss: 0.02021123096346855, eval metrics: {'eval_loss': 0.018119021489857695, 'MRE': 0.14338509658645932, 'MAE': 0.11644564914543207, 'RMSE': 108.50913883948546}
[2023-12-20 11:25:12,610] {executor.py:187} INFO - Saving model at step 170000 with loss 0.02021123096346855,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 88%|████████▊ | 54993/62500 [04:22<00:27, 276.21it/s]

[2023-12-20 11:25:48,866] {executor.py:111} INFO - ModelType.MLP trained for 877.307288051 seconds.
[2023-12-20 11:25:48,867] {executor.py:112} INFO - ModelType.MLP eval at step 180000.
[2023-12-20 11:26:01,218] {executor.py:116} INFO - ModelType.MLP train loss: 0.012308205477893353, eval metrics: {'eval_loss': 0.019459871961213647, 'MRE': 0.16557622116448764, 'MAE': 0.11703839534539884, 'RMSE': 97.7178875555515}
[2023-12-20 11:26:01,277] {executor.py:187} INFO - Saving model at step 180000 with loss 0.012308205477893353,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:02<00:00, 206.44it/s]

[2023-12-20 11:26:28,955] {executor.py:150} INFO - ModelType.MLP training epoch 3



  4%|▍         | 2494/62500 [00:08<03:32, 281.91it/s]

[2023-12-20 11:26:37,920] {executor.py:111} INFO - ModelType.MLP trained for 926.361625759 seconds.
[2023-12-20 11:26:37,921] {executor.py:112} INFO - ModelType.MLP eval at step 190000.


  4%|▍         | 2494/62500 [00:19<03:32, 281.91it/s]

[2023-12-20 11:26:50,046] {executor.py:116} INFO - ModelType.MLP train loss: 0.018175901845097542, eval metrics: {'eval_loss': 0.01850770862678066, 'MRE': 0.14529855256086804, 'MAE': 0.1078524882289416, 'RMSE': 81.73849679474075}
[2023-12-20 11:26:50,099] {executor.py:187} INFO - Saving model at step 190000 with loss 0.018175901845097542,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 20%|█▉        | 12486/62500 [00:57<03:03, 272.77it/s]

[2023-12-20 11:27:26,521] {executor.py:111} INFO - ModelType.MLP trained for 974.96204674 seconds.
[2023-12-20 11:27:26,521] {executor.py:112} INFO - ModelType.MLP eval at step 200000.


 20%|█▉        | 12486/62500 [01:09<03:03, 272.77it/s]

[2023-12-20 11:27:38,764] {executor.py:116} INFO - ModelType.MLP train loss: 0.023725124076008797, eval metrics: {'eval_loss': 0.01768645366106648, 'MRE': 0.13966284172836982, 'MAE': 0.10342697966177462, 'RMSE': 86.39770293668523}
[2023-12-20 11:27:38,816] {executor.py:187} INFO - Saving model at step 200000 with loss 0.023725124076008797,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 36%|███▌      | 22484/62500 [01:45<02:22, 280.70it/s] 

[2023-12-20 11:28:14,618] {executor.py:111} INFO - ModelType.MLP trained for 1023.059136041 seconds.
[2023-12-20 11:28:14,619] {executor.py:112} INFO - ModelType.MLP eval at step 210000.
[2023-12-20 11:28:26,791] {executor.py:116} INFO - ModelType.MLP train loss: 0.03179485350847244, eval metrics: {'eval_loss': 0.01808055626992602, 'MRE': 0.15075719508443894, 'MAE': 0.11333899256369276, 'RMSE': 100.98181625126851}
[2023-12-20 11:28:26,846] {executor.py:187} INFO - Saving model at step 210000 with loss 0.03179485350847244,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 52%|█████▏    | 32489/62500 [02:33<01:47, 278.05it/s] 

[2023-12-20 11:29:02,736] {executor.py:111} INFO - ModelType.MLP trained for 1071.17741281 seconds.
[2023-12-20 11:29:02,737] {executor.py:112} INFO - ModelType.MLP eval at step 220000.
[2023-12-20 11:29:14,751] {executor.py:116} INFO - ModelType.MLP train loss: 0.01608746126294136, eval metrics: {'eval_loss': 0.018403819385212846, 'MRE': 0.15023935448054207, 'MAE': 0.1105163751591264, 'RMSE': 86.61835000814169}
[2023-12-20 11:29:14,804] {executor.py:187} INFO - Saving model at step 220000 with loss 0.01608746126294136,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 68%|██████▊   | 42479/62500 [03:21<01:11, 280.76it/s] 

[2023-12-20 11:29:50,978] {executor.py:111} INFO - ModelType.MLP trained for 1119.418912087 seconds.
[2023-12-20 11:29:50,978] {executor.py:112} INFO - ModelType.MLP eval at step 230000.
[2023-12-20 11:30:02,885] {executor.py:116} INFO - ModelType.MLP train loss: 0.013137214817106724, eval metrics: {'eval_loss': 0.017929195344662294, 'MRE': 0.14624997371480813, 'MAE': 0.11122851285867898, 'RMSE': 95.61802873307653}
[2023-12-20 11:30:02,936] {executor.py:187} INFO - Saving model at step 230000 with loss 0.013137214817106724,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 84%|████████▍ | 52484/62500 [04:09<00:36, 275.15it/s]

[2023-12-20 11:30:38,988] {executor.py:111} INFO - ModelType.MLP trained for 1167.429638351 seconds.
[2023-12-20 11:30:38,989] {executor.py:112} INFO - ModelType.MLP eval at step 240000.
[2023-12-20 11:30:51,339] {executor.py:116} INFO - ModelType.MLP train loss: 0.0190349742770195, eval metrics: {'eval_loss': 0.017906541869202628, 'MRE': 0.1612737994021516, 'MAE': 0.10786637252487997, 'RMSE': 94.94903535205535}
[2023-12-20 11:30:51,392] {executor.py:187} INFO - Saving model at step 240000 with loss 0.0190349742770195,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [04:58<00:00, 209.23it/s]

[2023-12-20 11:31:27,679] {executor.py:150} INFO - ModelType.MLP training epoch 4



  0%|          | 0/62500 [00:00<?, ?it/s]

[2023-12-20 11:31:27,810] {executor.py:111} INFO - ModelType.MLP trained for 1216.250975295 seconds.
[2023-12-20 11:31:27,810] {executor.py:112} INFO - ModelType.MLP eval at step 250000.
[2023-12-20 11:31:40,174] {executor.py:116} INFO - ModelType.MLP train loss: 0.09881850332021713, eval metrics: {'eval_loss': 0.01727094475051854, 'MRE': 0.14594254659102188, 'MAE': 0.10029244331317289, 'RMSE': 74.90917256774276}
[2023-12-20 11:31:40,227] {executor.py:187} INFO - Saving model at step 250000 with loss 0.09881850332021713,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 16%|█▌        | 9993/62500 [00:49<03:12, 272.26it/s]

[2023-12-20 11:32:17,184] {executor.py:111} INFO - ModelType.MLP trained for 1265.625292301 seconds.
[2023-12-20 11:32:17,185] {executor.py:112} INFO - ModelType.MLP eval at step 260000.


 16%|█▌        | 9993/62500 [01:00<03:12, 272.26it/s]

[2023-12-20 11:32:29,734] {executor.py:116} INFO - ModelType.MLP train loss: 0.011900201439857483, eval metrics: {'eval_loss': 0.018563424382293598, 'MRE': 0.13736410035527685, 'MAE': 0.10618510351445568, 'RMSE': 74.32034774772758}
[2023-12-20 11:32:29,787] {executor.py:187} INFO - Saving model at step 260000 with loss 0.011900201439857483,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 32%|███▏      | 19997/62500 [01:38<02:33, 277.34it/s] 

[2023-12-20 11:33:06,208] {executor.py:111} INFO - ModelType.MLP trained for 1314.649126595 seconds.
[2023-12-20 11:33:06,209] {executor.py:112} INFO - ModelType.MLP eval at step 270000.


 32%|███▏      | 19997/62500 [01:50<02:33, 277.34it/s]

[2023-12-20 11:33:18,375] {executor.py:116} INFO - ModelType.MLP train loss: 0.022295072674751282, eval metrics: {'eval_loss': 0.01984053178024944, 'MRE': 0.1735247233918468, 'MAE': 0.12213228467581592, 'RMSE': 95.78117802165167}
[2023-12-20 11:33:18,428] {executor.py:187} INFO - Saving model at step 270000 with loss 0.022295072674751282,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 48%|████▊     | 29990/62500 [02:27<01:58, 274.53it/s] 

[2023-12-20 11:33:55,696] {executor.py:111} INFO - ModelType.MLP trained for 1364.137114019 seconds.
[2023-12-20 11:33:55,697] {executor.py:112} INFO - ModelType.MLP eval at step 280000.
[2023-12-20 11:34:08,063] {executor.py:116} INFO - ModelType.MLP train loss: 0.012233452871441841, eval metrics: {'eval_loss': 0.01860899735751096, 'MRE': 0.14474968993664042, 'MAE': 0.09705923271863628, 'RMSE': 71.1630766120044}
[2023-12-20 11:34:08,119] {executor.py:187} INFO - Saving model at step 280000 with loss 0.012233452871441841,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 64%|██████▍   | 39986/62500 [03:17<01:17, 290.94it/s] 

[2023-12-20 11:34:45,230] {executor.py:111} INFO - ModelType.MLP trained for 1413.671737904 seconds.
[2023-12-20 11:34:45,231] {executor.py:112} INFO - ModelType.MLP eval at step 290000.
[2023-12-20 11:34:57,437] {executor.py:116} INFO - ModelType.MLP train loss: 0.008876865729689598, eval metrics: {'eval_loss': 0.01787450385824777, 'MRE': 0.14195099857429866, 'MAE': 0.1121844632696466, 'RMSE': 106.8048280311317}
[2023-12-20 11:34:57,491] {executor.py:187} INFO - Saving model at step 290000 with loss 0.008876865729689598,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 80%|███████▉  | 49979/62500 [04:05<00:41, 302.45it/s]

[2023-12-20 11:35:33,431] {executor.py:111} INFO - ModelType.MLP trained for 1461.872520029 seconds.
[2023-12-20 11:35:33,432] {executor.py:112} INFO - ModelType.MLP eval at step 300000.
[2023-12-20 11:35:45,459] {executor.py:116} INFO - ModelType.MLP train loss: 0.04570930078625679, eval metrics: {'eval_loss': 0.018845860352744347, 'MRE': 0.15464435893036346, 'MAE': 0.11150886262439602, 'RMSE': 102.5277287732354}
[2023-12-20 11:35:45,513] {executor.py:187} INFO - Saving model at step 300000 with loss 0.04570930078625679,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 96%|█████████▌| 59980/62500 [04:54<00:09, 279.06it/s]

[2023-12-20 11:36:21,940] {executor.py:111} INFO - ModelType.MLP trained for 1510.381688562 seconds.
[2023-12-20 11:36:21,941] {executor.py:112} INFO - ModelType.MLP eval at step 310000.
[2023-12-20 11:36:34,262] {executor.py:116} INFO - ModelType.MLP train loss: 0.013305333442986012, eval metrics: {'eval_loss': 0.01791243683920242, 'MRE': 0.14300531110732834, 'MAE': 0.11177959268818816, 'RMSE': 117.28548107136335}
[2023-12-20 11:36:34,314] {executor.py:187} INFO - Saving model at step 310000 with loss 0.013305333442986012,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:16<00:00, 197.66it/s]

[2023-12-20 11:36:43,876] {executor.py:150} INFO - ModelType.MLP training epoch 5



 12%|█▏        | 7475/62500 [00:27<03:22, 272.38it/s]

[2023-12-20 11:37:11,264] {executor.py:111} INFO - ModelType.MLP trained for 1559.705323892 seconds.
[2023-12-20 11:37:11,265] {executor.py:112} INFO - ModelType.MLP eval at step 320000.
[2023-12-20 11:37:23,356] {executor.py:116} INFO - ModelType.MLP train loss: 0.013156212866306305, eval metrics: {'eval_loss': 0.017905524520841428, 'MRE': 0.15371918511460836, 'MAE': 0.10061022609296245, 'RMSE': 80.8690098159696}
[2023-12-20 11:37:23,407] {executor.py:187} INFO - Saving model at step 320000 with loss 0.013156212866306305,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 28%|██▊       | 17482/62500 [01:15<02:37, 286.16it/s]

[2023-12-20 11:37:59,664] {executor.py:111} INFO - ModelType.MLP trained for 1608.1049343 seconds.
[2023-12-20 11:37:59,664] {executor.py:112} INFO - ModelType.MLP eval at step 330000.
[2023-12-20 11:38:11,673] {executor.py:116} INFO - ModelType.MLP train loss: 0.02478645369410515, eval metrics: {'eval_loss': 0.01726079684354365, 'MRE': 0.14122007072667342, 'MAE': 0.10441759422144574, 'RMSE': 95.60695853004297}
[2023-12-20 11:38:11,725] {executor.py:187} INFO - Saving model at step 330000 with loss 0.02478645369410515,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 44%|████▍     | 27484/62500 [02:05<02:06, 277.06it/s] 

[2023-12-20 11:38:48,947] {executor.py:111} INFO - ModelType.MLP trained for 1657.387971431 seconds.
[2023-12-20 11:38:48,947] {executor.py:112} INFO - ModelType.MLP eval at step 340000.
[2023-12-20 11:39:01,449] {executor.py:116} INFO - ModelType.MLP train loss: 0.018834387883543968, eval metrics: {'eval_loss': 0.018420447795982474, 'MRE': 0.14787467062043233, 'MAE': 0.10925151871188898, 'RMSE': 93.70247856833176}
[2023-12-20 11:39:01,504] {executor.py:187} INFO - Saving model at step 340000 with loss 0.018834387883543968,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 60%|█████▉    | 37487/62500 [02:54<01:30, 275.08it/s] 

[2023-12-20 11:39:38,275] {executor.py:111} INFO - ModelType.MLP trained for 1706.715810772 seconds.
[2023-12-20 11:39:38,275] {executor.py:112} INFO - ModelType.MLP eval at step 350000.


 60%|█████▉    | 37487/62500 [03:04<01:30, 275.08it/s]

[2023-12-20 11:39:50,759] {executor.py:116} INFO - ModelType.MLP train loss: 0.01259913295507431, eval metrics: {'eval_loss': 0.01806449387148488, 'MRE': 0.14010598898430218, 'MAE': 0.09787851597009165, 'RMSE': 92.65531407954006}
[2023-12-20 11:39:50,818] {executor.py:187} INFO - Saving model at step 350000 with loss 0.01259913295507431,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 76%|███████▌  | 47485/62500 [03:44<00:53, 281.94it/s] 

[2023-12-20 11:40:28,122] {executor.py:111} INFO - ModelType.MLP trained for 1756.563001066 seconds.
[2023-12-20 11:40:28,122] {executor.py:112} INFO - ModelType.MLP eval at step 360000.


 76%|███████▌  | 47485/62500 [03:54<00:53, 281.94it/s]

[2023-12-20 11:40:40,598] {executor.py:116} INFO - ModelType.MLP train loss: 0.030382707715034485, eval metrics: {'eval_loss': 0.02237078582784161, 'MRE': 0.15332988006262577, 'MAE': 0.14004864601043346, 'RMSE': 160.85343340862494}
[2023-12-20 11:40:40,657] {executor.py:187} INFO - Saving model at step 360000 with loss 0.030382707715034485,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 92%|█████████▏| 57479/62500 [04:33<00:18, 272.57it/s]

[2023-12-20 11:41:17,235] {executor.py:111} INFO - ModelType.MLP trained for 1805.675856154 seconds.
[2023-12-20 11:41:17,235] {executor.py:112} INFO - ModelType.MLP eval at step 370000.


 92%|█████████▏| 57479/62500 [04:44<00:18, 272.57it/s]

[2023-12-20 11:41:29,725] {executor.py:116} INFO - ModelType.MLP train loss: 0.023822596296668053, eval metrics: {'eval_loss': 0.018069912244030276, 'MRE': 0.14584859585019724, 'MAE': 0.11707281213943742, 'RMSE': 112.97604729699523}
[2023-12-20 11:41:29,779] {executor.py:187} INFO - Saving model at step 370000 with loss 0.023822596296668053,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:04<00:00, 205.09it/s]

[2023-12-20 11:41:48,624] {executor.py:150} INFO - ModelType.MLP training epoch 6



  8%|▊         | 4975/62500 [00:18<03:19, 287.98it/s]

[2023-12-20 11:42:06,756] {executor.py:111} INFO - ModelType.MLP trained for 1855.197061951 seconds.
[2023-12-20 11:42:06,756] {executor.py:112} INFO - ModelType.MLP eval at step 380000.


  8%|▊         | 4975/62500 [00:29<03:19, 287.98it/s]

[2023-12-20 11:42:19,043] {executor.py:116} INFO - ModelType.MLP train loss: 0.007746235001832247, eval metrics: {'eval_loss': 0.01744074332348071, 'MRE': 0.14980829916786512, 'MAE': 0.10299970014216367, 'RMSE': 81.36348193888766}
[2023-12-20 11:42:19,096] {executor.py:187} INFO - Saving model at step 380000 with loss 0.007746235001832247,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 24%|██▍       | 14995/62500 [01:07<02:56, 268.62it/s]

[2023-12-20 11:42:55,858] {executor.py:111} INFO - ModelType.MLP trained for 1904.299776427 seconds.
[2023-12-20 11:42:55,859] {executor.py:112} INFO - ModelType.MLP eval at step 390000.
[2023-12-20 11:43:08,261] {executor.py:116} INFO - ModelType.MLP train loss: 0.01232222095131874, eval metrics: {'eval_loss': 0.01830274661250878, 'MRE': 0.14022398183076734, 'MAE': 0.1148551547963278, 'RMSE': 109.15651663990933}
[2023-12-20 11:43:08,313] {executor.py:187} INFO - Saving model at step 390000 with loss 0.01232222095131874,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 40%|███▉      | 24981/62500 [01:55<02:14, 279.29it/s] 

[2023-12-20 11:43:44,605] {executor.py:111} INFO - ModelType.MLP trained for 1953.046150199 seconds.
[2023-12-20 11:43:44,606] {executor.py:112} INFO - ModelType.MLP eval at step 400000.
[2023-12-20 11:43:57,075] {executor.py:116} INFO - ModelType.MLP train loss: 0.011293438263237476, eval metrics: {'eval_loss': 0.017091282390961422, 'MRE': 0.13806927470775732, 'MAE': 0.09980460926043647, 'RMSE': 78.40295838860607}
[2023-12-20 11:43:57,128] {executor.py:187} INFO - Saving model at step 400000 with loss 0.011293438263237476,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 56%|█████▌    | 34996/62500 [02:45<01:39, 277.55it/s] 

[2023-12-20 11:44:34,062] {executor.py:111} INFO - ModelType.MLP trained for 2002.503560152 seconds.
[2023-12-20 11:44:34,063] {executor.py:112} INFO - ModelType.MLP eval at step 410000.
[2023-12-20 11:44:46,109] {executor.py:116} INFO - ModelType.MLP train loss: 0.014725537970662117, eval metrics: {'eval_loss': 0.018877783469785935, 'MRE': 0.16066507188623583, 'MAE': 0.10348471603524309, 'RMSE': 86.05538670062569}
[2023-12-20 11:44:46,161] {executor.py:187} INFO - Saving model at step 410000 with loss 0.014725537970662117,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 72%|███████▏  | 44983/62500 [03:34<01:02, 280.22it/s] 

[2023-12-20 11:45:23,353] {executor.py:111} INFO - ModelType.MLP trained for 2051.794001724 seconds.
[2023-12-20 11:45:23,353] {executor.py:112} INFO - ModelType.MLP eval at step 420000.
[2023-12-20 11:45:35,615] {executor.py:116} INFO - ModelType.MLP train loss: 0.013445857912302017, eval metrics: {'eval_loss': 0.019884941211356782, 'MRE': 0.18378115119816973, 'MAE': 0.11108342724416137, 'RMSE': 87.08682682566287}
[2023-12-20 11:45:35,669] {executor.py:187} INFO - Saving model at step 420000 with loss 0.013445857912302017,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 88%|████████▊ | 54975/62500 [04:23<00:28, 266.13it/s]

[2023-12-20 11:46:12,101] {executor.py:111} INFO - ModelType.MLP trained for 2100.542431929 seconds.
[2023-12-20 11:46:12,102] {executor.py:112} INFO - ModelType.MLP eval at step 430000.
[2023-12-20 11:46:24,453] {executor.py:116} INFO - ModelType.MLP train loss: 0.014299608767032623, eval metrics: {'eval_loss': 0.017846525647444652, 'MRE': 0.15492592025574736, 'MAE': 0.1116031028856617, 'RMSE': 109.69152508480467}
[2023-12-20 11:46:24,508] {executor.py:187} INFO - Saving model at step 430000 with loss 0.014299608767032623,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


100%|██████████| 62500/62500 [05:04<00:00, 205.16it/s]

[2023-12-20 11:46:53,271] {executor.py:150} INFO - ModelType.MLP training epoch 7



  4%|▍         | 2474/62500 [00:08<03:37, 275.62it/s]

[2023-12-20 11:47:02,348] {executor.py:111} INFO - ModelType.MLP trained for 2150.789463196 seconds.
[2023-12-20 11:47:02,349] {executor.py:112} INFO - ModelType.MLP eval at step 440000.
[2023-12-20 11:47:14,638] {executor.py:116} INFO - ModelType.MLP train loss: 0.0252571702003479, eval metrics: {'eval_loss': 0.01734024309675209, 'MRE': 0.13035251202114168, 'MAE': 0.0961828310944977, 'RMSE': 80.02155301351344}
[2023-12-20 11:47:14,690] {executor.py:187} INFO - Saving model at step 440000 with loss 0.0252571702003479,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 20%|█▉        | 12485/62500 [00:58<03:06, 268.09it/s]

[2023-12-20 11:47:51,887] {executor.py:111} INFO - ModelType.MLP trained for 2200.328028152 seconds.
[2023-12-20 11:47:51,887] {executor.py:112} INFO - ModelType.MLP eval at step 450000.
[2023-12-20 11:48:03,964] {executor.py:116} INFO - ModelType.MLP train loss: 0.03653121739625931, eval metrics: {'eval_loss': 0.01711066064560786, 'MRE': 0.140876006867974, 'MAE': 0.096582951403554, 'RMSE': 81.02810668725435}
[2023-12-20 11:48:04,015] {executor.py:187} INFO - Saving model at step 450000 with loss 0.03653121739625931,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 36%|███▌      | 22494/62500 [01:47<02:26, 273.39it/s] 

[2023-12-20 11:48:40,930] {executor.py:111} INFO - ModelType.MLP trained for 2249.371411558 seconds.
[2023-12-20 11:48:40,931] {executor.py:112} INFO - ModelType.MLP eval at step 460000.
[2023-12-20 11:48:53,197] {executor.py:116} INFO - ModelType.MLP train loss: 0.02340446040034294, eval metrics: {'eval_loss': 0.017795948096653448, 'MRE': 0.15306044204773409, 'MAE': 0.10439502876031279, 'RMSE': 81.23388729717469}
[2023-12-20 11:48:53,249] {executor.py:187} INFO - Saving model at step 460000 with loss 0.02340446040034294,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 52%|█████▏    | 32495/62500 [02:37<01:51, 269.86it/s] 

[2023-12-20 11:49:30,426] {executor.py:111} INFO - ModelType.MLP trained for 2298.867714761 seconds.
[2023-12-20 11:49:30,427] {executor.py:112} INFO - ModelType.MLP eval at step 470000.
[2023-12-20 11:49:42,494] {executor.py:116} INFO - ModelType.MLP train loss: 0.022118858993053436, eval metrics: {'eval_loss': 0.01910977988937404, 'MRE': 0.15289047727861982, 'MAE': 0.10768020246392951, 'RMSE': 83.43972082818759}
[2023-12-20 11:49:42,546] {executor.py:187} INFO - Saving model at step 470000 with loss 0.022118858993053436,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 68%|██████▊   | 42497/62500 [03:25<01:10, 284.13it/s] 

[2023-12-20 11:50:19,277] {executor.py:111} INFO - ModelType.MLP trained for 2347.718684831 seconds.
[2023-12-20 11:50:19,278] {executor.py:112} INFO - ModelType.MLP eval at step 480000.
[2023-12-20 11:50:31,206] {executor.py:116} INFO - ModelType.MLP train loss: 0.01074175164103508, eval metrics: {'eval_loss': 0.01694277050758712, 'MRE': 0.138152107230635, 'MAE': 0.10125798507709348, 'RMSE': 74.88878722578994}
[2023-12-20 11:50:31,257] {executor.py:187} INFO - Saving model at step 480000 with loss 0.01074175164103508,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 84%|████████▍ | 52487/62500 [04:15<00:36, 273.54it/s]

[2023-12-20 11:51:08,624] {executor.py:111} INFO - ModelType.MLP trained for 2397.065079509 seconds.
[2023-12-20 11:51:08,625] {executor.py:112} INFO - ModelType.MLP eval at step 490000.
[2023-12-20 11:51:20,749] {executor.py:116} INFO - ModelType.MLP train loss: 0.012763307429850101, eval metrics: {'eval_loss': 0.017551989438370802, 'MRE': 0.13825136079335948, 'MAE': 0.10278707953265952, 'RMSE': 79.15120289077805}
[2023-12-20 11:51:20,802] {executor.py:187} INFO - Saving model at step 490000 with loss 0.012763307429850101,save path: /root/guohao/repos/DLT-perf-model/notebooks/ckpts/RTX2080Ti_CPUALL/MLP/single_train2023-12-20_11-11-11


 93%|█████████▎| 58089/62500 [04:48<00:21, 201.42it/s]


KeyboardInterrupt: 

In [None]:
from executor import grid_search_loop
# reload(executor)
# grid_search_loop(
#     model_type,
#     conf,
#     preprocessed_train_ds,
#     preprocessed_eval_ds,
#     compute_evaluate_metrics,
#     to_device,
#     init_model,
# )