In [13]:
import os

import torch
import json
import torch.nn as nn
from torch.utils.data import DataLoader
from collections import defaultdict
from matplotlib import pyplot
import numpy as np
import pandas as pd

from gcd_data_manipulation import ClusterDataset
from gcd_data_manipulation import prepare_data
from shared_workspace_module import SharedWorkspaceModule

In [77]:
job_id = 6318371744
device = 'cuda' if torch.cuda.is_available() else 'cpu'

def rmse(yhat, y):
    print(type(yhat), type(y))
    return np.sqrt(np.mean((y - yhat)**2))


def rmspe(yhat, y):
    # EPSILON = 1e-10
    # print(((y - yhat) / (y + EPSILON))**2)
    return rmse(yhat, y) / np.mean(y)

def rmsse_2(yhat, y):
    e = y - yhat
    m = 1 / (len(y) - 1)
    t = np.sum(abs(np.delete((y - np.roll(y, 1)), 0)))

    print((m * t)**2)

    # return np.sqrt((e / (m * t))**2)
    return np.sqrt(np.mean((e / (m * t))**2))

def lag_rmse(y):
    yhat = np.delete(np.roll(y, 1), 0)
    y = np.delete(y, 0)
    # print(y)
    # print(yhat)
    return rmse(yhat, y)

def rmsse(yhat, y):
    e_2 = (y - yhat)**2
    m = 1 / (len(y) - 1)
    t = np.sum(abs(np.delete((y - np.roll(y, 1)), 0)))
    return np.sqrt(np.mean(e_2 / (m * t)))


In [16]:
def test_model(exp):
    #print(os.getcwd())
    #pass
    checkpoint = torch.load(f'../models/gwt_models/gwt_model_{exp}.pth')

    args = checkpoint['model_args']
    print(f'last epoch: {checkpoint["epoch"]}')
    print(f'last loss: {checkpoint["loss"]}')

    with open('columns_selection.json') as f:
        columns_selection = json.load(f)

    columns_to_consider = columns_selection[args.columns_to_consider]
    num_targets = len(args.prediction_targets)

    preprocessed_data = prepare_data(f'../data/task-usage_job-ID-{job_id}_total.csv', columns_to_consider, targets=args.prediction_targets, sliding_window=args.sliding_window, aggr_type='mean')

    test_data = ClusterDataset(preprocessed_data, num_targets=num_targets, training=True, split_percentage=1)
    test_data.values.to(device)

    model = SharedWorkspaceModule(
        h_dim=args.h_dim,
        ffn_dim=args.ffn_dim,
        num_layers=args.num_layers,
        num_heads=args.num_heads,
        dropout=args.dropout,
        shared_memory_attention=args.shared_memory_attention,
        share_vanilla_parameters=args.share_vanilla_parameters,
        use_topk=args.use_topk,
        topk=args.topk,
        mem_slots=args.mem_slots,
        num_targets=num_targets
    ).cuda()

    model.load_state_dict(checkpoint['state_dict'])

    model.eval()

    test_loader = DataLoader(test_data, batch_size=args.batch_size)
    criterion = nn.L1Loss(reduction='sum').cuda()
    result = defaultdict(list)

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            if num_targets == 1:
                targets = targets.reshape((targets.shape[0], 1))

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss = loss.item() / targets.shape[0]

            result['loss'].append(test_loss)

            for pred in outputs:
                result['predicted_value'].append(pred.to('cpu').numpy())

            for act in targets:
                result['actual_value'].append(act.to('cpu').numpy())

    return result

In [78]:
def eval_result(result):
    print(f'Loss: {result["loss"]}')
    # print(type(result["predicted_value"]), np.array(result["predicted_value"]))
    #print(f'RMSE: {rmse(np.array(result["predicted_value"]), np.array(result["actual_value"]))}')
    #print(f'RMSPE: {rmspe(np.array(result["predicted_value"]), np.array(result["actual_value"]))}')
    # print(np.array(result["actual_value"]))
    #print(f'RMSSE: {rmsse(np.array(result["predicted_value"]), np.array(result["actual_value"]))}')
    print(f'RMSSE 2: {rmsse_2(np.array(result["predicted_value"]), np.array(result["actual_value"]))}')
    print(f'LAG: {lag_rmse(np.array(result["actual_value"]))}')





In [80]:
res = test_model('ray_tune_result')
eval_result(res)

last epoch: 100
last loss: 0.027329015947962478
transformer embed_dim 64
functional? False
total heads 2
head dim 32
use topk?True
topk:4
MEM SLOTS:6
Null attention:False
USING SHARED MEMORY ATTENTION +++++++++
Using gate style unit
query:4160
key:4160
value:4160
attention_mlp:4160
layernorm1:128
layernorm2:128
input_projector:4160
input projector:64
input_gate_projector:27584
memory_gate_projector:49920
relational volatie!!!
Loss: [0.14742141857481839]
0.01091309796404014
RMSSE 2: 2.682858467102051
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
LAG: 0.23879042267799377




# tr_sw_baseline model:
RMSE: 0.293494313955307
RMSPE: 0.9888595342636108
RMSSE: 0.9080561399459839

# ray_results
RMSE: 0.2802668809890747
RMSPE: 0.9442927837371826
RMSSE: 2.682858467102051

# tr_sw_t0_lb24
RMSE: 0.4566091001033783
RMSPE: 1.5384360551834106
RMSSE: 1.4127246141433716
