In [29]:
import pandas as pd

def parse_dataset_string(s):
    parts = s.split('/')
    if len(parts) == 2:
        dataset = parts[0]
        freq = None
        term = parts[1]
    elif len(parts) == 3:
        dataset, freq, term = parts
    else:
        raise ValueError(f"Unexpected dataset format: {s}")
    return pd.Series([dataset, freq, term])

In [30]:
def get_df(result_path):
    df = pd.read_csv(result_path)
    df = df[["dataset", "domain", "num_variates", "model", "train_step",
        "eval_metrics/MASE[0.5]", "eval_metrics/mean_weighted_sum_quantile_loss"]]
    df = df.rename(columns={"eval_metrics/MASE[0.5]": "MASE"})
    df = df.rename(columns={"eval_metrics/mean_weighted_sum_quantile_loss": "Qloss"})
    df = df.rename(columns={"train_step": "train step"})

    # Apply the parsing
    df[['dataset_name', 'freq', 'term']] = df['dataset'].apply(parse_dataset_string)

    # Manually fill missing frequencies
    freq_map = {
        'exchange_rate': 'D',
        'ercot': 'H',
        'dominick': 'W'
    }

    df['freq'] = df.apply(lambda row: freq_map.get(row['dataset_name'], row['freq']), axis=1)
    df['freq'] = df['freq'].map({
        "10S": "10 seconds",
        "5T": "5 minutes",
        "10T": "10 minute",
        "H": "1 hour",
        "D": "1 day",
        "W": "1 week",
    })

    df["size"] = df.apply(lambda row: row['model'].split("-")[-1], axis=1)

    df.drop(columns='dataset', inplace=True)

    df = df.rename(columns={"dataset_name": "dataset"})
    df = df[["dataset", "freq", "term", "domain", "num_variates", "model", "size", "train step",
        "MASE", "Qloss"]]
    
    return df

In [31]:
base_df = get_df(result_path = "results/test/chronos_bolt_base_test_results.csv")
small_df = get_df(result_path = "results/test/chronos_bolt_small_test_results.csv")
mini_df = get_df(result_path = "results/test/chronos_bolt_mini_test_results.csv")
tiny_df = get_df(result_path = "results/test/chronos_bolt_tiny_test_results.csv")

In [32]:
df = pd.concat([base_df, small_df, mini_df, tiny_df])

In [33]:
df

Unnamed: 0,dataset,freq,term,domain,num_variates,model,size,train step,MASE,Qloss
0,exchange_rate,1 day,short,Econ/Fin,1,chronos-bolt-base,base,500,24.251517,0.063937
1,exchange_rate,1 day,medium,Econ/Fin,1,chronos-bolt-base,base,500,20.395774,0.061711
2,exchange_rate,1 day,long,Econ/Fin,1,chronos-bolt-base,base,500,19.877615,0.059450
3,ercot,1 hour,short,Energy,1,chronos-bolt-base,base,500,2.493600,0.161155
4,ercot,1 hour,medium,Energy,1,chronos-bolt-base,base,500,2.703264,0.169326
...,...,...,...,...,...,...,...,...,...,...
900,solar,10 minute,medium,Energy,1,chronos-bolt-tiny,tiny,200000,2.830568,1.418010
901,solar,10 minute,long,Energy,1,chronos-bolt-tiny,tiny,200000,2.772037,1.369096
902,solar,1 hour,short,Energy,1,chronos-bolt-tiny,tiny,200000,3.004985,1.411132
903,solar,1 hour,medium,Energy,1,chronos-bolt-tiny,tiny,200000,3.053877,1.419348


In [46]:
df_long = pd.melt(
    df,
    id_vars=["dataset", "freq", "term", "domain", "num_variates", "model", "size", "train step"],
    value_vars=["MASE", "Qloss"],
    var_name="metric",
    value_name="value"
)


df_pivot = df_long.pivot_table(
    index=["dataset", "freq", "term", "domain", "num_variates", "size", "metric"],
    columns="train step",
    values="value"
).reset_index()

In [37]:
df_pivot.head()

train step,dataset,freq,term,domain,num_variates,size,metric,500,10000,20000,...,110000,120000,130000,140000,150000,160000,170000,180000,190000,200000
0,bitbrains_fast_storage,1 hour,short,Web/CloudOps,2,base,MASE,2.025949,2.197866,2.05782,...,2.074659,,,,,,,,,
1,bitbrains_fast_storage,1 hour,short,Web/CloudOps,2,base,Qloss,1.554958,1.537555,1.550422,...,1.49942,,,,,,,,,
2,bitbrains_fast_storage,1 hour,short,Web/CloudOps,2,mini,MASE,1.9015,2.051663,1.98201,...,1.92838,2.148383,2.316839,2.121807,2.119477,1.985224,1.943896,2.025754,1.974481,2.047939
3,bitbrains_fast_storage,1 hour,short,Web/CloudOps,2,mini,Qloss,1.489185,1.539738,1.462624,...,1.492582,1.564842,1.517571,1.529321,1.517911,1.51959,1.492736,1.471911,1.496896,1.493888
4,bitbrains_fast_storage,1 hour,short,Web/CloudOps,2,small,MASE,1.980415,1.952074,1.958588,...,2.009047,2.256154,2.145661,2.019833,1.99803,2.045786,2.004791,1.98544,2.264403,2.134664


In [51]:
train_step_list = [500] + [i*10_000 for i in range(1, 20+1)]

In [54]:
df_grouped_term = df_pivot.groupby(["term", "size", "metric"])[train_step_list].mean().reset_index()
#df_grouped_term

In [55]:
df_grouped_domain = df_pivot.groupby(["domain", "size", "metric"])[train_step_list].mean().reset_index()
#df_grouped_domain

In [56]:
df_grouped_freq = df_pivot.groupby(["freq", "size", "metric"])[train_step_list].mean().reset_index()
#df_grouped_freq

In [58]:
print(df_grouped_term.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{lllrrrrrrrrrrrrrrrrrrrrr}
\toprule
term & size & metric & 500 & 10000 & 20000 & 30000 & 40000 & 50000 & 60000 & 70000 & 80000 & 90000 & 100000 & 110000 & 120000 & 130000 & 140000 & 150000 & 160000 & 170000 & 180000 & 190000 & 200000 \\
\midrule
long & base & MASE & 4.455 & 4.430 & 4.413 & 4.409 & 4.371 & 4.459 & 4.432 & 4.381 & 4.429 & 4.421 & 4.401 & 4.333 & 6.797 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
long & base & Qloss & 0.632 & 0.625 & 0.633 & 0.619 & 0.622 & 0.622 & 0.631 & 0.623 & 0.629 & 0.625 & 0.626 & 0.621 & 0.560 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
long & mini & MASE & 4.405 & 4.368 & 4.428 & 4.410 & 4.387 & 4.441 & 4.418 & 4.405 & 4.400 & 4.420 & 4.383 & 4.403 & 4.430 & 4.442 & 4.360 & 4.440 & 4.383 & 4.474 & 4.406 & 4.445 & 4.400 \\
long & mini & Qloss & 0.628 & 0.618 & 0.626 & 0.619 & 0.623 & 0.623 & 0.631 & 0.615 & 0.625 & 0.619 & 0.621 & 0.624 & 0.618 & 0.625 & 0.621 & 0.622 & 0.621 & 0.627 & 0.622 & 0.630 & 0.625 \\
long & sm

In [59]:
print(df_grouped_domain.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{lllrrrrrrrrrrrrrrrrrrrrr}
\toprule
domain & size & metric & 500 & 10000 & 20000 & 30000 & 40000 & 50000 & 60000 & 70000 & 80000 & 90000 & 100000 & 110000 & 120000 & 130000 & 140000 & 150000 & 160000 & 170000 & 180000 & 190000 & 200000 \\
\midrule
Econ/Fin & base & MASE & 21.508 & 20.214 & 20.105 & 19.450 & 20.153 & 19.658 & 19.391 & 19.646 & 19.392 & 19.964 & 20.284 & 19.338 & 19.866 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
Econ/Fin & base & Qloss & 0.062 & 0.060 & 0.061 & 0.059 & 0.059 & 0.059 & 0.059 & 0.059 & 0.059 & 0.060 & 0.060 & 0.059 & 0.060 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
Econ/Fin & mini & MASE & 19.571 & 19.903 & 19.542 & 19.732 & 20.234 & 20.178 & 20.081 & 19.871 & 19.775 & 19.749 & 20.153 & 20.061 & 19.976 & 19.810 & 19.254 & 20.516 & 19.200 & 19.519 & 19.284 & 19.572 & 20.010 \\
Econ/Fin & mini & Qloss & 0.058 & 0.059 & 0.058 & 0.059 & 0.060 & 0.061 & 0.060 & 0.059 & 0.059 & 0.060 & 0.060 & 0.060 & 0.059 & 0.059 & 0.059 & 0.061 

In [60]:
print(df_grouped_freq.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{lllrrrrrrrrrrrrrrrrrrrrr}
\toprule
freq & size & metric & 500 & 10000 & 20000 & 30000 & 40000 & 50000 & 60000 & 70000 & 80000 & 90000 & 100000 & 110000 & 120000 & 130000 & 140000 & 150000 & 160000 & 170000 & 180000 & 190000 & 200000 \\
\midrule
1 day & base & MASE & 13.563 & 12.813 & 12.761 & 12.326 & 12.772 & 12.462 & 12.313 & 12.457 & 12.317 & 12.662 & 12.862 & 12.271 & 19.866 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
1 day & base & Qloss & 0.167 & 0.168 & 0.170 & 0.163 & 0.167 & 0.165 & 0.166 & 0.167 & 0.167 & 0.167 & 0.167 & 0.165 & 0.060 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
1 day & mini & MASE & 12.411 & 12.617 & 12.400 & 12.553 & 12.829 & 12.805 & 12.755 & 12.622 & 12.550 & 12.558 & 12.824 & 12.708 & 12.671 & 12.564 & 12.261 & 12.989 & 12.237 & 12.417 & 12.262 & 12.453 & 12.699 \\
1 day & mini & Qloss & 0.164 & 0.169 & 0.165 & 0.167 & 0.168 & 0.169 & 0.165 & 0.169 & 0.168 & 0.167 & 0.176 & 0.171 & 0.171 & 0.168 & 0.170 & 0.166 & 0.171 & 0.16