### Functions

In [1]:
import pandas as pd
from IPython.display import display_html
from itertools import chain, cycle

def log_metrics_df(metrics_dir, epoch_level):
    df = pd.read_csv(f"{metrics_dir}/metrics.csv")

    if epoch_level:
        train_columns = [x for x in df.columns if f"epoch_level_t_" in x and "test" not in x] + ["epoch"]
        val_columns = [x for x in df.columns if f"epoch_level_v_" in x] + ["epoch"]
    else:
        train_columns = [x for x in df.columns if f"t_" in x and ("test" not in x and "epoch_level" not in x)] + ["epoch"]
        val_columns = [x for x in df.columns if f"v_" in x and "epoch_level" not in x] + ["epoch"]

    train_metrics = df[train_columns].dropna().groupby(["epoch"]).mean()
    val_metrics = df[val_columns].dropna().groupby(["epoch"]).mean()

    return train_metrics, val_metrics

def display_side_by_side(*args,titles=cycle([''])):
    html_str=''
    for df, title in zip(args, chain(titles,cycle(['</br>']))):
        html_str+='<th style="text-align:center"><td style="vertical-align:top">'
        html_str+=f'<h2 style="text-align: center;">{title}</h2>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        html_str+='</td></th>'
    display_html(html_str,raw=True)

def show_training(data_name, model_name, epoch_level=False):
    dir_path = f"logs/train/{data_name}/{model_name}/version_0"
    train_metrics, val_metrics = log_metrics_df(dir_path, epoch_level)
    show_train_metrics, show_val_metrics, = train_metrics.iloc[-20:, :], val_metrics.iloc[-20:, :]
    show_train_metrics_styler = show_train_metrics.style.set_table_attributes("style='display:inline'").set_caption('Train Metrics')
    show_val_metrics_styler = show_val_metrics.style.set_table_attributes("style='display:inline'").set_caption('Val Metrics')
    display_html(show_train_metrics_styler._repr_html_(), raw=True)
    display_html(show_val_metrics_styler._repr_html_(), raw=True)

def show_testing(data_name, model_name, focused_metric=None):
    metrics_path = f"logs/train/{data_name}/{model_name}/version_0/test_metrics.csv"
    test_metrics = pd.read_csv(metrics_path)
    if focused_metric is not None:
        test_metrics = test_metrics.sort_values(focused_metric, ascending=False)
    test_metrics = test_metrics[["model_name"] + [x for x in test_metrics.columns if x != "model_name"]]
    show_test_metrics_styler = test_metrics.style.set_table_attributes("style='display:inline'").set_caption('Test Metrics')
    display_html(show_test_metrics_styler._repr_html_(), raw=True)

### Train

In [2]:
data_name = "raf_6"
model_name = "stgm"

In [3]:
show_training(data_name, model_name, False)

Unnamed: 0_level_0,t_loss,t_rmse,t_rmsse
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,251.219297,15.519693,4.825589
8,255.364613,15.647975,5.59863
9,274.348338,16.207761,5.085451
10,259.919496,15.862905,7.537243
11,285.092004,16.545638,6.025627
12,273.360981,16.176918,4.733098
13,269.367269,16.071056,7.165746
14,247.249221,15.466127,4.962742
15,246.684388,15.384406,6.215301
16,259.995012,15.832394,6.264172


Unnamed: 0_level_0,v_loss,v_rmse,v_rmsse
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,279.157562,16.70801,1.300557
8,279.439087,16.716433,1.659742
9,279.108337,16.706535,1.364701
10,278.900452,16.700287,1.203426
11,278.799561,16.69718,1.159825
12,278.984467,16.702692,1.161431
13,278.904846,16.700407,1.163015
14,278.750671,16.695732,1.160901
15,279.192841,16.709064,1.285774
16,279.039581,16.704477,1.203372


### Test

In [4]:
show_testing(data_name, model_name)

Unnamed: 0,model_name,test_loss,test_rmse,test_rmsse
0,epoch=11-v_rmsse=1.160.ckpt,278.799561,16.69718,1.159825
1,epoch=12-v_rmsse=1.161.ckpt,278.984467,16.702692,1.161431
2,epoch=14-v_rmsse=1.161.ckpt,278.750671,16.695732,1.160901
3,epoch=23-v_rmsse=1.160.ckpt,278.458221,16.687057,1.160202
4,epoch=25-v_rmsse=1.163.ckpt,279.107117,16.70639,1.162839


### Ablation Study

In [5]:
show_testing("raf_6", "icgnn")

Unnamed: 0,model_name,test_loss,test_rmse,test_rmsse
0,epoch=11-v_rmsse=1.616.ckpt,280.197205,16.739092,1.616028
1,epoch=12-v_rmsse=1.615.ckpt,280.196777,16.739079,1.615319
2,epoch=13-v_rmsse=1.617.ckpt,280.197632,16.739105,1.616633
3,epoch=2-v_rmsse=1.615.ckpt,280.197205,16.739092,1.615466
4,epoch=3-v_rmsse=1.614.ckpt,280.196411,16.739069,1.614497


In [6]:
show_testing("raf_6", "icgnn_no_stgm")

Unnamed: 0,model_name,test_loss,test_rmsse
0,epoch=0-v_rmsse=1.270.ckpt,16.918581,1.269568
1,epoch=1-v_rmsse=1.263.ckpt,16.904516,1.262679
2,epoch=15-v_rmsse=1.290.ckpt,16.890251,1.290419
3,epoch=16-v_rmsse=1.302.ckpt,16.889,1.301707
4,epoch=5-v_rmsse=1.339.ckpt,16.898802,1.339119


In [7]:
show_testing("raf_6", "icgnn_no_sigmoid")

Unnamed: 0,model_name,test_loss,test_rmsse
0,epoch=0-v_rmsse=1.193.ckpt,16.923935,1.193248
1,epoch=1-v_rmsse=1.205.ckpt,16.930164,1.205483
2,epoch=2-v_rmsse=1.288.ckpt,16.916109,1.288322
3,epoch=3-v_rmsse=1.271.ckpt,16.915995,1.271281
4,epoch=8-v_rmsse=1.288.ckpt,16.893265,1.287837


In [8]:
show_testing("raf_6", "icgnn_no_stgm_no_sigmoid")

Unnamed: 0,model_name,test_loss,test_rmsse
0,epoch=17-v_rmsse=1.236.ckpt,16.895565,1.23592
1,epoch=2-v_rmsse=1.259.ckpt,16.9044,1.259124
2,epoch=25-v_rmsse=1.252.ckpt,16.887745,1.251972
3,epoch=26-v_rmsse=1.241.ckpt,16.894119,1.240713
4,epoch=4-v_rmsse=1.247.ckpt,16.909544,1.24742


### Summary per-TS Classification

In [9]:
import torch

import numpy as np

from loss import rmse_loss, rmsse_loss

rmse_loss_instance = rmse_loss(None)
rmsse_loss_instance = rmsse_loss(None)

In [28]:
def get_stats(data, pred_period=6, lookback_period=6):
    def up_zero(array):
        return np.where(array < 0, 0, array)

    pred_icgnn = up_zero(np.load(f"logs/train/{data}_{pred_period}/icgnn/version_0/test.npy").squeeze(0))
    pred_croston = up_zero(np.load(f"logs/train/{data}_{pred_period}/crostonclassic/version_0/test.npy").squeeze(0))
    pred_sba = up_zero(np.load(f"logs/train/{data}_{pred_period}/sba/version_0/test.npy").squeeze(0))
    pred_tsb = up_zero(np.load(f"logs/train/{data}_{pred_period}/tsb/version_0/test.npy").squeeze(0))
    pred_adida = up_zero(np.load(f"logs/train/{data}_{pred_period}/adida/version_0/test.npy").squeeze(0))
    pred_imapa = up_zero(np.load(f"logs/train/{data}_{pred_period}/imapa/version_0/test.npy").squeeze(0))

    all = np.load(f"../data/{data}/preprocessed/ts.npy").T
    gt = all[-pred_period:]
    historical = all[-(lookback_period + pred_period):-pred_period]
    classification_historical = all

    classification = ["intermittent", "smooth", "lumpy", "erratic"]
    models = ["icgnn", "croston", "sba", "tsb", "adida", "imapa", "gt", "historical"]

    ts_classification = {k: {} for k in classification}
    for cl_ in classification:
        for mod_ in models:
            ts_classification[cl_][mod_] = []

    for i in range(historical.shape[1]):
        single_pred_icgnn = pred_icgnn[:, i]
        single_pred_croston = pred_croston[:, i]
        single_pred_sba = pred_sba[:, i]
        single_pred_tsb = pred_tsb[:, i]
        single_pred_adida = pred_adida[:, i]
        single_pred_imapa = pred_imapa[:, i]

        single_gt = gt[:, i]
        single_historical = historical[:, i]
        single_historical_classification = classification_historical[:, i]

        single_historical_non_zero = single_historical_classification[single_historical_classification!=0]

        if len(single_historical_non_zero) > 0:
            p = len(single_historical_classification)/len(single_historical_non_zero)
            cov = (np.std(single_historical_non_zero)/np.mean(single_historical_non_zero))**2
        else:
            p = np.inf
            cov = np.inf

        if p < 1.32 and cov < 0.49:
            classification_ = "smooth"
        elif p >= 1.32 and cov < 0.49:
            classification_ = "intermittent"
        elif p < 1.32 and cov >= 0.49:
            classification_ = "erratic"
        else:
            classification_ = "lumpy"

        ts_classification[classification_]["icgnn"].append(single_pred_icgnn.tolist())
        ts_classification[classification_]["croston"].append(single_pred_croston.tolist())
        ts_classification[classification_]["sba"].append(single_pred_sba.tolist())
        ts_classification[classification_]["tsb"].append(single_pred_tsb.tolist())
        ts_classification[classification_]["adida"].append(single_pred_adida.tolist())
        ts_classification[classification_]["imapa"].append(single_pred_imapa.tolist())
        ts_classification[classification_]["gt"].append(single_gt.tolist())
        ts_classification[classification_]["historical"].append(single_historical.tolist())

    for k in ts_classification:
        for v in ts_classification[k]:
            torch_array = torch.from_numpy(np.array(ts_classification[k][v]))
            ts_classification[k][v] = torch_array.unsqueeze(0)

    delete_keys = []
    for k in ts_classification:
        for v in ts_classification[k]:
            if ts_classification[k][v].shape[1] == 0:
                delete_keys.append(k)
                break
    for key in set(delete_keys):
        del ts_classification[key]
    
    stats = {k: {} for k in classification}
    for cl_ in classification:
        for mod_ in models:
            if mod_ not in ["gt", "historical"]:
                stats[cl_][mod_] = {"rmse": 0, "rmsse": 0}

    for class_ in ts_classification:
        for model_ in ts_classification[class_]:
            if model_ not in ["gt", "historical"]:
                stats[class_][model_]["rmse"] = rmse_loss_instance(ts_classification[class_][model_], ts_classification[class_]["gt"]).item()
                stats[class_][model_]["rmsse"] = rmsse_loss_instance(ts_classification[class_][model_], ts_classification[class_]["gt"], ts_classification[class_]["historical"]).item()
        
    stats_df = {
        "class": [],
        "model": [],
        "rmse": [],
        "rmsse": [],
    }
    for class_ in stats:
        for model_ in stats[class_]:
            stats_df["class"].append(class_)
            stats_df["model"].append(model_)
            stats_df["rmse"].append(stats[class_][model_]["rmse"])
            stats_df["rmsse"].append(stats[class_][model_]["rmsse"])
    stats_df = pd.DataFrame(stats_df)
    stats_df.loc[stats_df["rmse"]==0, "rmse"] = np.nan
    stats_df.loc[stats_df["rmsse"]==0, "rmsse"] = np.nan
    stats_df = stats_df.dropna().reset_index(drop=True)
    return stats_df

In [29]:
auto_stats = get_stats("auto")
raf_stats = get_stats("raf")
carparts_stats = get_stats("carparts")

In [30]:
carparts_stats.sort_values(["class", "model"])

Unnamed: 0,class,model,rmse,rmsse
22,erratic,adida,1.617359,0.520308
19,erratic,croston,1.953124,0.763591
18,erratic,icgnn,1.513074,0.454084
23,erratic,imapa,1.561776,0.484693
20,erratic,sba,1.904505,0.725764
21,erratic,tsb,1.61388,0.515001
4,intermittent,adida,0.64769,0.997786
1,intermittent,croston,1.71655,6.84397
0,intermittent,icgnn,0.556396,0.789731
5,intermittent,imapa,0.604595,0.899413
