In [1]:
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby(["championship", "year"])[metric_name]
        .rank(method="min", ascending=True)
        .astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["championship", "year", f"{metric_name}_rank"])
        .groupby(["championship", "year"])[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
df = pd.read_csv("../real_data/results/metrics.csv")
metrics = (
    df[df["year"].isin([2023, 2024])]
    .groupby(["championship", "model_name", "year"])
    .agg(
        num_rounds=("num_rounds", "nunique"),
        bs=("brier_score", "mean"),
        rps=("ranked_probability_score", "mean"),
        ls=("log_score", "mean"),
    )
    .reset_index()
)


metrics = create_rank(metrics, "bs")
metrics = create_rank(metrics, "rps")
metrics = create_rank(metrics, "ls")
metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
cols = [
    "championship",
    "model_name",
    "year",
    "num_rounds",
    "bs",
    "rps",
    "ls",
    "bs_rank",
    "rps_rank",
    "ls_rank",
    "bs_rank_gt_naive",
    "rps_rank_gt_naive",
    "ls_rank_gt_naive",
]

metrics[cols].head(16)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,brazil,poisson_2,2023,5,0.636963,0.22266,1.05633,1,1,1,0,0,0
1,england,bradley_terry_4,2023,5,0.586022,0.203931,0.984568,1,2,1,0,0,0
2,france,poisson_1,2023,5,0.640247,0.227319,1.064071,1,1,1,0,0,0
3,germany,poisson_1,2023,5,0.585212,0.196162,0.984302,1,1,1,0,0,0
4,italy,poisson_2,2023,5,0.61085,0.197724,1.01802,1,1,1,0,0,0
5,netherlands,poisson_2,2023,5,0.555518,0.182842,0.939529,1,1,1,0,0,0
6,portugal,bradley_terry_3,2023,5,0.58117,0.194929,0.978762,1,1,1,0,0,0
7,spain,poisson_2,2023,5,0.593093,0.198861,0.993205,1,1,1,0,0,0
8,brazil,poisson_2,2024,5,0.64449,0.223845,1.067396,1,1,1,0,0,0
9,england,bradley_terry_3,2024,5,0.595411,0.209399,0.994086,1,2,1,0,0,0


In [4]:
metrics[metrics["model_name"] == "poisson_2"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,brazil,poisson_2,2023,5,0.636963,0.22266,1.05633,1,1,1,0,0,0
4,italy,poisson_2,2023,5,0.61085,0.197724,1.01802,1,1,1,0,0,0
5,netherlands,poisson_2,2023,5,0.555518,0.182842,0.939529,1,1,1,0,0,0
7,spain,poisson_2,2023,5,0.593093,0.198861,0.993205,1,1,1,0,0,0
8,brazil,poisson_2,2024,5,0.64449,0.223845,1.067396,1,1,1,0,0,0
10,france,poisson_2,2024,5,0.578007,0.215011,0.977628,1,1,1,0,0,0
13,netherlands,poisson_2,2024,5,0.61202,0.206165,1.018922,1,1,1,0,0,0
15,spain,poisson_2,2024,5,0.594461,0.205064,0.997053,1,1,1,0,0,0
18,france,poisson_2,2023,5,0.653501,0.235665,1.081951,2,2,2,0,0,0
33,england,poisson_2,2023,5,0.589183,0.205617,0.987367,3,3,3,0,0,0


In [5]:
metrics[metrics["model_name"] == "poisson_5"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
112,brazil,poisson_5,2023,5,0.680268,0.243485,1.119888,8,8,8,1,1,1
113,england,poisson_5,2023,5,0.692493,0.253903,1.146212,8,8,8,1,1,1
114,france,poisson_5,2023,5,0.717873,0.266644,1.188923,8,8,8,1,1,1
115,germany,poisson_5,2023,5,0.707801,0.243882,1.170084,8,8,8,1,1,1
116,italy,poisson_5,2023,5,0.705905,0.242561,1.169616,8,8,8,1,1,1
117,netherlands,poisson_5,2023,5,0.671736,0.237895,1.108494,8,8,8,1,1,1
118,portugal,poisson_5,2023,5,0.697625,0.249318,1.148747,8,8,8,1,1,1
119,spain,poisson_5,2023,5,0.702682,0.250115,1.162232,8,8,8,1,1,1
120,brazil,poisson_5,2024,5,0.703951,0.249685,1.158673,8,8,8,1,1,1
121,england,poisson_5,2024,5,0.714321,0.269553,1.170378,8,8,8,1,1,1


In [6]:
metrics[metrics["model_name"] == "naive"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
50,france,naive,2023,5,0.666667,0.240078,1.098612,4,4,3,1,1,1
56,brazil,naive,2024,5,0.666667,0.233195,1.098612,4,4,4,1,1,1
75,germany,naive,2024,5,0.666667,0.231678,1.098612,5,5,5,1,1,1
76,italy,naive,2024,5,0.666667,0.231895,1.098612,5,6,5,1,1,1
78,portugal,naive,2024,5,0.666667,0.233957,1.098612,5,6,5,1,1,1
81,england,naive,2023,5,0.666667,0.239057,1.098612,6,6,7,1,1,1
83,germany,naive,2023,5,0.666667,0.230878,1.098612,6,6,5,1,1,1
84,italy,naive,2023,5,0.666667,0.226348,1.098612,6,6,5,1,1,1
93,netherlands,naive,2024,5,0.666667,0.231272,1.098612,6,6,6,1,1,1
95,spain,naive,2024,5,0.666667,0.23634,1.098612,6,6,6,1,1,1


In [7]:
metrics.groupby("model_name").agg(
    num_rounds=("num_rounds", "sum"),
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
).reset_index().sort_values(by="bs", ignore_index=True)

Unnamed: 0,model_name,num_rounds,bs,rps,ls
0,poisson_2,80,0.610633,0.210251,1.018975
1,poisson_1,80,0.610825,0.209864,1.019823
2,bradley_terry_3,80,0.612971,0.210693,1.024966
3,bradley_terry_4,80,0.613169,0.210774,1.025692
4,poisson_4,80,0.657366,0.229253,1.089806
5,naive,80,0.666667,0.234959,1.098612
6,poisson_3,80,0.670464,0.235598,1.102677
7,poisson_5,80,0.699338,0.250923,1.154573
