In [1]:
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby("year")[metric_name].rank(method="min", ascending=True).astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["year", f"{metric_name}_rank"])
        .groupby("year")[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
df = pd.read_csv("../real_data/results/metrics.csv")
metrics = (
    df.groupby(["model_name", "year"])
    .agg(
        num_rounds=("num_rounds", "nunique"),
        bs=("brier_score", "mean"),
        rps=("ranked_probability_score", "mean"),
        ls=("log_score", "mean"),
    )
    .reset_index()
)


metrics = create_rank(metrics, "bs")
metrics = create_rank(metrics, "rps")
metrics = create_rank(metrics, "ls")
metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
cols = [
    "model_name",
    "year",
    "num_rounds",
    "bs",
    "rps",
    "ls",
    "bs_rank",
    "rps_rank",
    "ls_rank",
    "bs_rank_gt_naive",
    "rps_rank_gt_naive",
    "ls_rank_gt_naive",
]

metrics[cols].head(18)

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,poisson_2,2019,5,0.610386,0.207571,1.020561,1,1,1,0,0,0
1,poisson_2,2020,5,0.637875,0.220218,1.057718,1,1,1,0,0,0
2,poisson_2,2021,5,0.640098,0.221308,1.0597,1,1,1,0,0,0
3,poisson_2,2022,5,0.627416,0.216166,1.043888,1,1,1,0,0,0
4,poisson_2,2023,5,0.636559,0.222511,1.055645,1,1,1,0,0,0
5,poisson_2,2024,5,0.644047,0.223656,1.066786,1,1,1,0,0,0
6,poisson_1,2019,5,0.629037,0.216234,1.046402,2,2,2,0,0,0
7,poisson_4,2020,5,0.649856,0.224753,1.076066,2,2,2,0,0,0
8,poisson_1,2021,5,0.65429,0.228057,1.079825,2,3,2,0,0,0
9,poisson_4,2022,5,0.640243,0.222466,1.063048,2,2,2,0,0,0


In [4]:
metrics[metrics["model_name"] == "poisson_2"][cols]

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,poisson_2,2019,5,0.610386,0.207571,1.020561,1,1,1,0,0,0
1,poisson_2,2020,5,0.637875,0.220218,1.057718,1,1,1,0,0,0
2,poisson_2,2021,5,0.640098,0.221308,1.0597,1,1,1,0,0,0
3,poisson_2,2022,5,0.627416,0.216166,1.043888,1,1,1,0,0,0
4,poisson_2,2023,5,0.636559,0.222511,1.055645,1,1,1,0,0,0
5,poisson_2,2024,5,0.644047,0.223656,1.066786,1,1,1,0,0,0


In [5]:
metrics[metrics["model_name"] == "poisson_5"][cols]

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
30,poisson_5,2019,5,0.666089,0.231273,1.105566,6,6,7,0,0,1
43,poisson_5,2020,5,0.673566,0.237384,1.108957,8,8,8,1,1,1
44,poisson_5,2021,5,0.683198,0.238559,1.122741,8,8,8,1,1,1
45,poisson_5,2022,5,0.671129,0.237099,1.114106,8,8,8,1,1,1
46,poisson_5,2023,5,0.680534,0.243684,1.120308,8,8,8,1,1,1
47,poisson_5,2024,5,0.703138,0.249254,1.157096,8,8,8,1,1,1


In [6]:
metrics[cols]

Unnamed: 0,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,poisson_2,2019,5,0.610386,0.207571,1.020561,1,1,1,0,0,0
1,poisson_2,2020,5,0.637875,0.220218,1.057718,1,1,1,0,0,0
2,poisson_2,2021,5,0.640098,0.221308,1.0597,1,1,1,0,0,0
3,poisson_2,2022,5,0.627416,0.216166,1.043888,1,1,1,0,0,0
4,poisson_2,2023,5,0.636559,0.222511,1.055645,1,1,1,0,0,0
5,poisson_2,2024,5,0.644047,0.223656,1.066786,1,1,1,0,0,0
6,poisson_1,2019,5,0.629037,0.216234,1.046402,2,2,2,0,0,0
7,poisson_4,2020,5,0.649856,0.224753,1.076066,2,2,2,0,0,0
8,poisson_1,2021,5,0.65429,0.228057,1.079825,2,3,2,0,0,0
9,poisson_4,2022,5,0.640243,0.222466,1.063048,2,2,2,0,0,0
