In [54]:
import pandas as pd
import numpy as np

results_df = pd.read_csv("results/chronos_benchmark_results.csv")

In [55]:
results_df.head()

Unnamed: 0,dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates
0,exchange_rate/short,chronos_bolt_tiny,1e-05,1e-05,0.002109,1.253411,0.003749,0.003753,19.888833,0.00312,0.004798,0.003244,0.004178,Econ/Fin,1
1,exchange_rate/medium,chronos_bolt_tiny,0.000283,0.000283,0.011907,8.691109,0.025486,0.024767,119.859031,0.016835,0.025405,0.017968,0.014884,Econ/Fin,1
2,exchange_rate/long,chronos_bolt_tiny,0.000903,0.000903,0.019534,10.271974,0.032043,0.031064,166.443321,0.030048,0.044899,0.029188,0.024416,Econ/Fin,1
3,ercot/short,chronos_bolt_tiny,442683.0,442683.0,314.393826,0.977523,0.053302,0.055636,6.788732,665.344272,0.127664,0.060325,0.048371,Energy,1
4,ercot/medium,chronos_bolt_tiny,700258.933333,700258.933333,409.076432,1.310945,0.078551,0.074039,18.131886,836.814754,0.16088,0.078646,0.067144,Energy,1


In [56]:
dataset_names = results_df["dataset"].unique()

In [57]:
results_df = results_df[["dataset", "domain", "num_variates", "model",
    "eval_metrics/MASE[0.5]", "eval_metrics/mean_weighted_sum_quantile_loss"]]
results_df = results_df.rename(columns={"eval_metrics/MASE[0.5]": "MASE"})
results_df = results_df.rename(columns={"eval_metrics/mean_weighted_sum_quantile_loss": "Qloss"})

In [58]:
results_df["MASE"] = pd.to_numeric(results_df["MASE"], errors="coerce")
results_df["rank_MASE"] = results_df.groupby(["dataset"])["MASE"].rank(method="min")

results_df["Qloss"] = pd.to_numeric(results_df["Qloss"], errors="coerce")
results_df["rank_Qloss"] = results_df.groupby(["dataset"])["Qloss"].rank(method="min")

results_df["Rank"] = results_df["rank_MASE"] + results_df["rank_Qloss"] / 2

results_df = results_df[results_df["dataset"] != "dataset"]

results_df.head()

Unnamed: 0,dataset,domain,num_variates,model,MASE,Qloss,rank_MASE,rank_Qloss,Rank
0,exchange_rate/short,Econ/Fin,1,chronos_bolt_tiny,1.253411,0.004178,7.0,5.0,9.5
1,exchange_rate/medium,Econ/Fin,1,chronos_bolt_tiny,8.691109,0.014884,2.0,2.0,3.0
2,exchange_rate/long,Econ/Fin,1,chronos_bolt_tiny,10.271974,0.024416,5.0,3.0,6.5
3,ercot/short,Energy,1,chronos_bolt_tiny,0.977523,0.048371,8.0,8.0,12.0
4,ercot/medium,Energy,1,chronos_bolt_tiny,1.310945,0.067144,7.0,7.0,10.5


In [59]:
def create_dataset_df(results_df, dataset_name):
    dataset_df = results_df[results_df["dataset"] == dataset_name]
    dataset_df = dataset_df.drop(columns=["dataset", "domain", "num_variates", "rank_MASE", "rank_Qloss"])
    dataset_df = dataset_df.rename(columns={"model": "Metric"})
    dataset_df = dataset_df.set_index("Metric")
    dataset_df = dataset_df.transpose()
    dataset_df = dataset_df.rename(columns={"chronos_bolt_base": "Chr.B.B",
                                            "chronos_bolt_small": "Chr.B.S",
                                            "chronos_bolt_mini": "Chr.B.M",
                                            "chronos-bolt-mini": "Chr.B.M",
                                            "chronos_bolt_tiny": "Chr.B.T",
                                            "timesfm1": "T.FM1",
                                            "timesfm2": "T.FM2",
                                            "moirai_small": "Moi.S",
                                            "moirai_base": "Moi.B",
                                            "moirai_large": "Moi.L",
                                            })
    dataset_df["Best"] = dataset_df.idxmin(axis=1) # add columns best
    return dataset_df

In [60]:
final_df

Metric,Dataset,Domain,Term,Frequency,Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,exchange_rate,Econ/Fin,short,D,MASE,1.249064,0.802832,1.105789,1.253411,0.962424,1.254216,1.207863,0.844398,1.668695,Chr.B.S
1,exchange_rate,Econ/Fin,short,D,Qloss,0.004579,0.003843,0.004379,0.004178,0.003686,0.003856,0.004574,0.003251,0.004371,Moi.B
2,exchange_rate,Econ/Fin,short,D,Rank,10.5,2.5,7.5,9.5,4.0,10.0,9.0,2.5,12.0,Chr.B.S
3,exchange_rate,Econ/Fin,medium,D,MASE,9.167733,9.054845,8.657556,8.691109,10.366673,9.286063,10.622544,8.779855,14.383419,Chr.B.M
4,exchange_rate,Econ/Fin,medium,D,Qloss,0.016081,0.014975,0.013126,0.014884,0.021322,0.015613,0.01769,0.015942,0.037142,Chr.B.M
5,exchange_rate,Econ/Fin,medium,D,Rank,8.0,5.5,1.5,3.0,11.0,8.0,11.5,5.5,13.5,Chr.B.M
6,exchange_rate,Econ/Fin,long,D,MASE,11.061553,9.700047,9.506321,10.271974,9.501345,11.013912,11.72432,10.213744,20.450645,T.FM1
7,exchange_rate,Econ/Fin,long,D,Qloss,0.028943,0.024171,0.021712,0.024416,0.027052,0.031435,0.030336,0.029356,0.073873,Chr.B.M
8,exchange_rate,Econ/Fin,long,D,Rank,9.5,4.0,2.5,6.5,3.0,10.0,11.5,7.0,13.5,Chr.B.M
9,ercot,Energy,short,H,MASE,0.708769,0.816378,0.892499,0.977523,0.727303,0.76708,1.047283,0.904736,0.862927,Chr.B.B


In [61]:
final_df = pd.DataFrame()

for name in dataset_names:
    dataset_df = create_dataset_df(results_df, name)
    dataset_df["dataset, term"] = name.replace("/", ", ")

    dataset_df = dataset_df.reset_index()
    dataset_df = dataset_df[["dataset, term", "index", "Chr.B.B", "Chr.B.S", "Chr.B.M",
        "Chr.B.T", "T.FM1", "T.FM2", "Moi.S", "Moi.B", "Moi.L", "Best"]]
    dataset_df = dataset_df.rename(columns={"index": "Metric"})
    dataset_df = dataset_df.rename_axis(index=" ")

    final_df = pd.concat([final_df, dataset_df], ignore_index=True)

In [62]:
final_df.head()

Metric,"dataset, term",Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,"exchange_rate, short",MASE,1.249064,0.802832,1.105789,1.253411,0.962424,1.254216,1.207863,0.844398,1.668695,Chr.B.S
1,"exchange_rate, short",Qloss,0.004579,0.003843,0.004379,0.004178,0.003686,0.003856,0.004574,0.003251,0.004371,Moi.B
2,"exchange_rate, short",Rank,10.5,2.5,7.5,9.5,4.0,10.0,9.0,2.5,12.0,Chr.B.S
3,"exchange_rate, medium",MASE,9.167733,9.054845,8.657556,8.691109,10.366673,9.286063,10.622544,8.779855,14.383419,Chr.B.M
4,"exchange_rate, medium",Qloss,0.016081,0.014975,0.013126,0.014884,0.021322,0.015613,0.01769,0.015942,0.037142,Chr.B.M


In [63]:
final_df.to_csv("results/chronos_benchmark.csv", index=False)

In [64]:
print(final_df.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{llrrrrrrrrrl}
\toprule
dataset, term & Metric & Chr.B.B & Chr.B.S & Chr.B.M & Chr.B.T & T.FM1 & T.FM2 & Moi.S & Moi.B & Moi.L & Best \\
\midrule
exchange_rate, short & MASE & 1.249 & 0.803 & 1.106 & 1.253 & 0.962 & 1.254 & 1.208 & 0.844 & 1.669 & Chr.B.S \\
exchange_rate, short & Qloss & 0.005 & 0.004 & 0.004 & 0.004 & 0.004 & 0.004 & 0.005 & 0.003 & 0.004 & Moi.B \\
exchange_rate, short & Rank & 10.500 & 2.500 & 7.500 & 9.500 & 4.000 & 10.000 & 9.000 & 2.500 & 12.000 & Chr.B.S \\
exchange_rate, medium & MASE & 9.168 & 9.055 & 8.658 & 8.691 & 10.367 & 9.286 & 10.623 & 8.780 & 14.383 & Chr.B.M \\
exchange_rate, medium & Qloss & 0.016 & 0.015 & 0.013 & 0.015 & 0.021 & 0.016 & 0.018 & 0.016 & 0.037 & Chr.B.M \\
exchange_rate, medium & Rank & 8.000 & 5.500 & 1.500 & 3.000 & 11.000 & 8.000 & 11.500 & 5.500 & 13.500 & Chr.B.M \\
exchange_rate, long & MASE & 11.062 & 9.700 & 9.506 & 10.272 & 9.501 & 11.014 & 11.724 & 10.214 & 20.451 & T.FM1 \\
exchange_rate, long & Qloss & 0.0

### More results

In [65]:
import json

with open("data/chronos_dataset_properties.json") as f:
    dataset_properties_map = json.load(f)

In [66]:
final_df["Dataset"] = final_df["dataset, term"].apply(
    lambda x: x.split(", ")[0] if len(x.split(", ")) > 0 else "Unknown")
final_df["Domain"] = final_df["Dataset"].apply(lambda x: dataset_properties_map[x]["domain"]
    if x in dataset_properties_map else "Unknown")
final_df["Term"] = final_df["dataset, term"].apply(
    lambda x: x.split(", ")[1] if len(x.split(", ")) > 1 else "Unknown")
final_df["Frequency"] = final_df["Dataset"].apply(lambda x: dataset_properties_map[x]["freq"]
    if x in dataset_properties_map else "Unknown")

final_df = final_df[["Dataset", "Domain", "Term", "Frequency", "Metric",
    "Chr.B.B", "Chr.B.S", "Chr.B.M", "Chr.B.T", "T.FM1", "T.FM2",
    "Moi.S", "Moi.B", "Moi.L", "Best"]]

final_df.head()

Metric,Dataset,Domain,Term,Frequency,Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,exchange_rate,Econ/Fin,short,D,MASE,1.249064,0.802832,1.105789,1.253411,0.962424,1.254216,1.207863,0.844398,1.668695,Chr.B.S
1,exchange_rate,Econ/Fin,short,D,Qloss,0.004579,0.003843,0.004379,0.004178,0.003686,0.003856,0.004574,0.003251,0.004371,Moi.B
2,exchange_rate,Econ/Fin,short,D,Rank,10.5,2.5,7.5,9.5,4.0,10.0,9.0,2.5,12.0,Chr.B.S
3,exchange_rate,Econ/Fin,medium,D,MASE,9.167733,9.054845,8.657556,8.691109,10.366673,9.286063,10.622544,8.779855,14.383419,Chr.B.M
4,exchange_rate,Econ/Fin,medium,D,Qloss,0.016081,0.014975,0.013126,0.014884,0.021322,0.015613,0.01769,0.015942,0.037142,Chr.B.M


In [67]:
domain_results = final_df.groupby(["Domain", "Metric"]).agg({
    "Chr.B.B": "mean",
    "Chr.B.S": "mean",
    "Chr.B.M": "mean",
    "Chr.B.T": "mean",
    "T.FM1": "mean",
    "T.FM2": "mean",
    "Moi.S": "mean",
    "Moi.B": "mean",
    "Moi.L": "mean",
}).reset_index()
domain_results["Best"] = domain_results[["Chr.B.B", "Chr.B.S", "Chr.B.M", "Chr.B.T", "T.FM1", "T.FM2",
    "Moi.S", "Moi.B", "Moi.L",]].idxmin(axis=1)

domain_results

Metric,Domain,Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,Econ/Fin,MASE,7.15945,6.519241,6.423222,6.738831,6.94348,7.18473,7.851576,6.612666,12.167586,Chr.B.M
1,Econ/Fin,Qloss,0.016534,0.01433,0.013072,0.014493,0.017353,0.016968,0.017533,0.016183,0.038462,Chr.B.M
2,Econ/Fin,Rank,9.333333,4.0,3.833333,6.333333,6.0,9.333333,10.666667,5.0,13.0,Chr.B.M
3,Energy,MASE,1.126748,1.161694,1.157077,1.248273,1.317687,1.023733,1.259514,1.1886,1.211441,T.FM2
4,Energy,Qloss,0.056604,0.058837,0.057636,0.062885,0.069458,0.049861,0.061899,0.05807,0.056123,T.FM2
5,Energy,Rank,6.0,7.166667,6.5,10.666667,9.833333,2.5,9.166667,7.666667,8.0,T.FM2
6,Sales,MASE,1.029641,1.037098,1.031228,1.035654,1.414487,1.185088,1.03032,0.964464,0.959251,Moi.L
7,Sales,Qloss,0.396968,0.399536,0.398875,0.399514,0.485637,0.444709,0.389833,0.375848,0.375224,Moi.L
8,Sales,Rank,6.5,9.333333,6.833333,8.666667,13.5,11.666667,6.5,2.5,2.0,Moi.L


In [68]:
term_results = final_df.groupby(["Term", "Metric"]).agg({
    "Chr.B.B": "mean",
    "Chr.B.S": "mean",
    "Chr.B.M": "mean",
    "Chr.B.T": "mean",
    "T.FM1": "mean",
    "T.FM2": "mean",
    "Moi.S": "mean",
    "Moi.B": "mean",
    "Moi.L": "mean",
}).reset_index()
term_results["Best"] = term_results[["Chr.B.B", "Chr.B.S", "Chr.B.M", "Chr.B.T", "T.FM1", "T.FM2",
    "Moi.S", "Moi.B", "Moi.L",]].idxmin(axis=1)

term_results

Metric,Term,Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,long,MASE,4.611158,4.134889,4.050685,4.344733,4.373734,4.618908,4.837621,4.269034,7.687986,Chr.B.M
1,long,Qloss,0.192836,0.189235,0.186858,0.19023,0.241015,0.216317,0.18628,0.181696,0.194787,Moi.B
2,long,Rank,9.5,7.0,4.166667,8.0,10.0,7.833333,9.0,5.0,7.0,Chr.B.M
3,medium,MASE,3.814455,3.803877,3.655793,3.693864,4.452639,3.850924,4.29511,3.690985,5.581512,Chr.B.M
4,medium,Qloss,0.16062,0.162535,0.159844,0.16204,0.201466,0.172683,0.156885,0.152308,0.159622,Moi.B
5,medium,Rank,6.666667,8.333333,5.166667,7.333333,12.666667,7.166667,6.333333,5.333333,8.5,Chr.B.M
6,short,MASE,0.890226,0.779267,0.905048,0.984161,0.849281,0.92372,1.008679,0.805711,1.068781,Chr.B.S
7,short,Qloss,0.116649,0.120933,0.122882,0.124622,0.129967,0.122538,0.1261,0.116098,0.1154,Moi.L
8,short,Rank,5.666667,5.166667,7.833333,10.333333,6.666667,8.5,11.0,4.833333,7.5,Moi.B


In [69]:
frequency_results = final_df.groupby(["Frequency", "Metric"]).agg({
    "Chr.B.B": "mean",
    "Chr.B.S": "mean",
    "Chr.B.M": "mean",
    "Chr.B.T": "mean",
    "T.FM1": "mean",
    "T.FM2": "mean",
    "Moi.S": "mean",
    "Moi.B": "mean",
    "Moi.L": "mean",
}).reset_index()
frequency_results["Frequency"] = frequency_results["Frequency"].map({
    "H": "1 hour",
    "D": "1 day",
    "W": "1 week",
})
# sort by frequency
frequency_results = frequency_results.sort_values(by=["Frequency", "Metric"])

frequency_results["Best"] = frequency_results[["Chr.B.B", "Chr.B.S", "Chr.B.M", "Chr.B.T", "T.FM1", "T.FM2",
    "Moi.S", "Moi.B", "Moi.L"]].idxmin(axis=1)

frequency_results # same as domain

Metric,Frequency,Metric.1,Chr.B.B,Chr.B.S,Chr.B.M,Chr.B.T,T.FM1,T.FM2,Moi.S,Moi.B,Moi.L,Best
0,1 day,MASE,7.15945,6.519241,6.423222,6.738831,6.94348,7.18473,7.851576,6.612666,12.167586,Chr.B.M
1,1 day,Qloss,0.016534,0.01433,0.013072,0.014493,0.017353,0.016968,0.017533,0.016183,0.038462,Chr.B.M
2,1 day,Rank,9.333333,4.0,3.833333,6.333333,6.0,9.333333,10.666667,5.0,13.0,Chr.B.M
3,1 hour,MASE,1.126748,1.161694,1.157077,1.248273,1.317687,1.023733,1.259514,1.1886,1.211441,T.FM2
4,1 hour,Qloss,0.056604,0.058837,0.057636,0.062885,0.069458,0.049861,0.061899,0.05807,0.056123,T.FM2
5,1 hour,Rank,6.0,7.166667,6.5,10.666667,9.833333,2.5,9.166667,7.666667,8.0,T.FM2
6,1 week,MASE,1.029641,1.037098,1.031228,1.035654,1.414487,1.185088,1.03032,0.964464,0.959251,Moi.L
7,1 week,Qloss,0.396968,0.399536,0.398875,0.399514,0.485637,0.444709,0.389833,0.375848,0.375224,Moi.L
8,1 week,Rank,6.5,9.333333,6.833333,8.666667,13.5,11.666667,6.5,2.5,2.0,Moi.L


In [70]:
print(domain_results.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{llrrrrrrrrrl}
\toprule
Domain & Metric & Chr.B.B & Chr.B.S & Chr.B.M & Chr.B.T & T.FM1 & T.FM2 & Moi.S & Moi.B & Moi.L & Best \\
\midrule
Econ/Fin & MASE & 7.159 & 6.519 & 6.423 & 6.739 & 6.943 & 7.185 & 7.852 & 6.613 & 12.168 & Chr.B.M \\
Econ/Fin & Qloss & 0.017 & 0.014 & 0.013 & 0.014 & 0.017 & 0.017 & 0.018 & 0.016 & 0.038 & Chr.B.M \\
Econ/Fin & Rank & 9.333 & 4.000 & 3.833 & 6.333 & 6.000 & 9.333 & 10.667 & 5.000 & 13.000 & Chr.B.M \\
Energy & MASE & 1.127 & 1.162 & 1.157 & 1.248 & 1.318 & 1.024 & 1.260 & 1.189 & 1.211 & T.FM2 \\
Energy & Qloss & 0.057 & 0.059 & 0.058 & 0.063 & 0.069 & 0.050 & 0.062 & 0.058 & 0.056 & T.FM2 \\
Energy & Rank & 6.000 & 7.167 & 6.500 & 10.667 & 9.833 & 2.500 & 9.167 & 7.667 & 8.000 & T.FM2 \\
Sales & MASE & 1.030 & 1.037 & 1.031 & 1.036 & 1.414 & 1.185 & 1.030 & 0.964 & 0.959 & Moi.L \\
Sales & Qloss & 0.397 & 0.400 & 0.399 & 0.400 & 0.486 & 0.445 & 0.390 & 0.376 & 0.375 & Moi.L \\
Sales & Rank & 6.500 & 9.333 & 6.833 & 8.667 & 13.500

In [71]:
print(term_results.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{llrrrrrrrrrl}
\toprule
Term & Metric & Chr.B.B & Chr.B.S & Chr.B.M & Chr.B.T & T.FM1 & T.FM2 & Moi.S & Moi.B & Moi.L & Best \\
\midrule
long & MASE & 4.611 & 4.135 & 4.051 & 4.345 & 4.374 & 4.619 & 4.838 & 4.269 & 7.688 & Chr.B.M \\
long & Qloss & 0.193 & 0.189 & 0.187 & 0.190 & 0.241 & 0.216 & 0.186 & 0.182 & 0.195 & Moi.B \\
long & Rank & 9.500 & 7.000 & 4.167 & 8.000 & 10.000 & 7.833 & 9.000 & 5.000 & 7.000 & Chr.B.M \\
medium & MASE & 3.814 & 3.804 & 3.656 & 3.694 & 4.453 & 3.851 & 4.295 & 3.691 & 5.582 & Chr.B.M \\
medium & Qloss & 0.161 & 0.163 & 0.160 & 0.162 & 0.201 & 0.173 & 0.157 & 0.152 & 0.160 & Moi.B \\
medium & Rank & 6.667 & 8.333 & 5.167 & 7.333 & 12.667 & 7.167 & 6.333 & 5.333 & 8.500 & Chr.B.M \\
short & MASE & 0.890 & 0.779 & 0.905 & 0.984 & 0.849 & 0.924 & 1.009 & 0.806 & 1.069 & Chr.B.S \\
short & Qloss & 0.117 & 0.121 & 0.123 & 0.125 & 0.130 & 0.123 & 0.126 & 0.116 & 0.115 & Moi.L \\
short & Rank & 5.667 & 5.167 & 7.833 & 10.333 & 6.667 & 8.500 & 1