In [1]:
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby("year")[metric_name].rank(method="min", ascending=True).astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["year", f"{metric_name}_rank"])
        .groupby("year")[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
df = pd.read_csv("../real_data/results/metrics.csv")
metrics = (
    df.groupby(["model_name", "year"])
    .agg(
        num_rounds=("num_rounds", "nunique"),
        bs=("brier_score", "mean"),
        rps=("ranked_probability_score", "mean"),
        ls=("log_score", "mean"),
    )
    .reset_index()
)


metrics = create_rank(metrics, "bs")
metrics = create_rank(metrics, "rps")
metrics = create_rank(metrics, "ls")
metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
cols = [
    "model_name",
    "year",
    "num_rounds",
    "bs",
    "rps",
    "ls",
    "bs_rank",
    "rps_rank",
    "ls_rank",
    "bs_rank_gt_naive",
    "rps_rank_gt_naive",
    "ls_rank_gt_naive",
]

metrics[cols].head(18)

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,poisson_2,2019,5,0.610564,0.207664,1.020872,1,1,1,0,0,0
1,poisson_2,2020,5,0.637703,0.220147,1.057506,1,1,1,0,0,0
2,poisson_2,2021,5,0.640085,0.221342,1.059714,1,1,1,0,0,0
3,poisson_2,2022,5,0.627538,0.216216,1.044095,1,1,1,0,0,0
4,poisson_2,2023,5,0.63677,0.222594,1.056034,1,1,1,0,0,0
5,poisson_2,2024,5,0.643812,0.223571,1.06643,1,1,1,0,0,0
6,poisson_1,2019,5,0.629171,0.216324,1.046633,2,2,2,0,0,0
7,poisson_4,2020,5,0.649801,0.224667,1.076019,2,2,2,0,0,0
8,poisson_1,2021,5,0.653944,0.227952,1.079346,2,3,2,0,0,0
9,poisson_4,2022,5,0.640192,0.222439,1.062938,2,2,2,0,0,0


In [4]:
metrics[metrics["model_name"] == "poisson_2"][cols]

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,poisson_2,2019,5,0.610564,0.207664,1.020872,1,1,1,0,0,0
1,poisson_2,2020,5,0.637703,0.220147,1.057506,1,1,1,0,0,0
2,poisson_2,2021,5,0.640085,0.221342,1.059714,1,1,1,0,0,0
3,poisson_2,2022,5,0.627538,0.216216,1.044095,1,1,1,0,0,0
4,poisson_2,2023,5,0.63677,0.222594,1.056034,1,1,1,0,0,0
5,poisson_2,2024,5,0.643812,0.223571,1.06643,1,1,1,0,0,0


In [5]:
metrics[metrics["model_name"] == "poisson_5"][cols]

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
30,poisson_5,2019,5,0.66623,0.23134,1.105846,6,6,7,0,0,1
43,poisson_5,2020,5,0.673397,0.23731,1.108752,8,8,8,1,1,1
44,poisson_5,2021,5,0.683126,0.238448,1.122694,8,8,8,1,1,1
45,poisson_5,2022,5,0.671462,0.237257,1.114822,8,8,8,1,1,1
46,poisson_5,2023,5,0.680091,0.243451,1.119732,8,8,8,1,1,1
47,poisson_5,2024,5,0.703079,0.249199,1.156926,8,8,8,1,1,1
