In [1]:
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby(["championship", "year"])[metric_name]
        .rank(method="min", ascending=True)
        .astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["championship", "year", f"{metric_name}_rank"])
        .groupby(["championship", "year"])[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
df = pd.read_csv("../real_data/results/metrics.csv")
metrics = (
    df[df["year"].isin([2023, 2024])]
    .groupby(["championship", "model_name", "year"])
    .agg(
        num_rounds=("num_rounds", "nunique"),
        bs=("brier_score", "mean"),
        rps=("ranked_probability_score", "mean"),
        ls=("log_score", "mean"),
    )
    .reset_index()
)


metrics = create_rank(metrics, "bs")
metrics = create_rank(metrics, "rps")
metrics = create_rank(metrics, "ls")
metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
cols = [
    "championship",
    "model_name",
    "year",
    "num_rounds",
    "bs",
    "rps",
    "ls",
    "bs_rank",
    "rps_rank",
    "ls_rank",
    "bs_rank_gt_naive",
    "rps_rank_gt_naive",
    "ls_rank_gt_naive",
]

metrics[cols].head(16)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,brazil,poisson_2,2023,5,0.636963,0.22266,1.05633,1,1,1,0,0,0
1,england,bradley_terry_4,2023,5,0.586022,0.203931,0.984568,1,2,1,0,0,0
2,france,poisson_1,2023,5,0.640247,0.227319,1.064071,1,1,1,0,0,0
3,germany,poisson_1,2023,5,0.585212,0.196162,0.984302,1,1,1,0,0,0
4,italy,poisson_2,2023,5,0.61085,0.197724,1.01802,1,1,1,0,0,0
5,netherlands,poisson_2,2023,5,0.555518,0.182842,0.939529,1,1,1,0,0,0
6,portugal,bradley_terry_3,2023,5,0.58117,0.194929,0.978762,1,1,1,0,0,0
7,spain,poisson_2,2023,5,0.593093,0.198861,0.993205,1,1,1,0,0,0
8,brazil,poisson_2,2024,5,0.64449,0.223845,1.067396,1,1,1,0,0,0
9,england,bradley_terry_3,2024,5,0.595411,0.209399,0.994086,1,2,1,0,0,0


In [4]:
metrics[metrics["model_name"] == "poisson_2"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,brazil,poisson_2,2023,5,0.636963,0.22266,1.05633,1,1,1,0,0,0
4,italy,poisson_2,2023,5,0.61085,0.197724,1.01802,1,1,1,0,0,0
5,netherlands,poisson_2,2023,5,0.555518,0.182842,0.939529,1,1,1,0,0,0
7,spain,poisson_2,2023,5,0.593093,0.198861,0.993205,1,1,1,0,0,0
8,brazil,poisson_2,2024,5,0.64449,0.223845,1.067396,1,1,1,0,0,0
10,france,poisson_2,2024,5,0.578007,0.215011,0.977628,1,1,1,0,0,0
13,netherlands,poisson_2,2024,5,0.61202,0.206165,1.018922,1,1,1,0,0,0
15,spain,poisson_2,2024,5,0.594461,0.205064,0.997053,1,1,1,0,0,0
18,france,poisson_2,2023,5,0.653501,0.235665,1.081951,2,2,2,0,0,0
33,england,poisson_2,2023,5,0.589183,0.205617,0.987367,3,3,3,0,0,0


In [5]:
metrics[metrics["model_name"] == "poisson_5"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
112,brazil,poisson_5,2023,5,0.680268,0.243485,1.119888,8,8,8,1,1,1
113,england,poisson_5,2023,5,0.692493,0.253903,1.146212,8,8,8,1,1,1
114,france,poisson_5,2023,5,0.717873,0.266644,1.188923,8,8,8,1,1,1
115,germany,poisson_5,2023,5,0.707801,0.243882,1.170084,8,8,8,1,1,1
116,italy,poisson_5,2023,5,0.705905,0.242561,1.169616,8,8,8,1,1,1
117,netherlands,poisson_5,2023,5,0.671736,0.237895,1.108494,8,8,8,1,1,1
118,portugal,poisson_5,2023,5,0.697625,0.249318,1.148747,8,8,8,1,1,1
119,spain,poisson_5,2023,5,0.702682,0.250115,1.162232,8,8,8,1,1,1
120,brazil,poisson_5,2024,5,0.703951,0.249685,1.158673,8,8,8,1,1,1
121,england,poisson_5,2024,5,0.714321,0.269553,1.170378,8,8,8,1,1,1


In [6]:
metrics[metrics["model_name"] == "naive"][cols]

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
50,france,naive,2023,5,0.666667,0.240078,1.098612,4,4,3,1,1,1
56,brazil,naive,2024,5,0.666667,0.233195,1.098612,4,4,4,1,1,1
75,germany,naive,2024,5,0.666667,0.231678,1.098612,5,5,5,1,1,1
76,italy,naive,2024,5,0.666667,0.231895,1.098612,5,6,5,1,1,1
78,portugal,naive,2024,5,0.666667,0.233957,1.098612,5,6,5,1,1,1
81,england,naive,2023,5,0.666667,0.239057,1.098612,6,6,7,1,1,1
83,germany,naive,2023,5,0.666667,0.230878,1.098612,6,6,5,1,1,1
84,italy,naive,2023,5,0.666667,0.226348,1.098612,6,6,5,1,1,1
93,netherlands,naive,2024,5,0.666667,0.231272,1.098612,6,6,6,1,1,1
95,spain,naive,2024,5,0.666667,0.23634,1.098612,6,6,6,1,1,1


## Brazil

In [7]:
metrics[metrics["championship"] == "brazil"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,brazil,poisson_2,2023,5,0.636963,0.22266,1.05633,1,1,1,0,0,0
1,brazil,poisson_4,2023,5,0.643933,0.225537,1.068225,2,2,2,0,0,0
2,brazil,poisson_1,2023,5,0.657986,0.231807,1.086306,3,3,3,0,0,0
3,brazil,bradley_terry_3,2023,5,0.66006,0.234937,1.090723,4,4,4,0,0,0
4,brazil,bradley_terry_4,2023,5,0.661226,0.235261,1.09348,5,6,5,0,0,0
5,brazil,poisson_3,2023,5,0.664127,0.235189,1.093489,6,5,6,0,0,0
6,brazil,naive,2023,5,0.666667,0.235593,1.098612,7,7,7,1,1,1
7,brazil,poisson_5,2023,5,0.680268,0.243485,1.119888,8,8,8,1,1,1
8,brazil,poisson_2,2024,5,0.64449,0.223845,1.067396,1,1,1,0,0,0
9,brazil,poisson_1,2024,5,0.659966,0.231365,1.091036,2,3,2,0,0,0


## England

In [8]:
metrics[metrics["championship"] == "england"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,england,bradley_terry_4,2023,5,0.586022,0.203931,0.984568,1,2,1,0,0,0
1,england,bradley_terry_3,2023,5,0.586057,0.203911,0.984951,2,1,2,0,0,0
2,england,poisson_2,2023,5,0.589183,0.205617,0.987367,3,3,3,0,0,0
3,england,poisson_1,2023,5,0.594263,0.207176,0.997073,4,4,4,0,0,0
4,england,poisson_4,2023,5,0.646627,0.229044,1.075106,5,5,5,0,0,0
5,england,naive,2023,5,0.666667,0.239057,1.098612,6,6,7,1,1,1
6,england,poisson_3,2023,5,0.667469,0.239232,1.097894,7,7,6,1,1,0
7,england,poisson_5,2023,5,0.692493,0.253903,1.146212,8,8,8,1,1,1
8,england,bradley_terry_3,2024,5,0.595411,0.209399,0.994086,1,2,1,0,0,0
9,england,bradley_terry_4,2024,5,0.595523,0.209252,0.994172,2,1,2,0,0,0


## France

In [9]:
metrics[metrics["championship"] == "france"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,france,poisson_1,2023,5,0.640247,0.227319,1.064071,1,1,1,0,0,0
1,france,poisson_2,2023,5,0.653501,0.235665,1.081951,2,2,2,0,0,0
2,france,bradley_terry_3,2023,5,0.663875,0.239438,1.116976,3,3,5,0,0,1
3,france,naive,2023,5,0.666667,0.240078,1.098612,4,4,3,1,1,1
4,france,bradley_terry_4,2023,5,0.667266,0.240781,1.125314,5,6,6,1,1,1
5,france,poisson_3,2023,5,0.669363,0.240525,1.102004,6,5,4,1,1,1
6,france,poisson_4,2023,5,0.684476,0.248616,1.126949,7,7,7,1,1,1
7,france,poisson_5,2023,5,0.717873,0.266644,1.188923,8,8,8,1,1,1
8,france,poisson_2,2024,5,0.578007,0.215011,0.977628,1,1,1,0,0,0
9,france,bradley_terry_3,2024,5,0.582859,0.21748,0.982126,2,3,2,0,0,0


## Germany

In [10]:
metrics[metrics["championship"] == "germany"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,germany,poisson_1,2023,5,0.585212,0.196162,0.984302,1,1,1,0,0,0
1,germany,bradley_terry_4,2023,5,0.59621,0.198025,0.997075,2,2,2,0,0,0
2,germany,bradley_terry_3,2023,5,0.597168,0.198242,0.99903,3,3,3,0,0,0
3,germany,poisson_2,2023,5,0.607467,0.203178,1.008628,4,4,4,0,0,0
4,germany,poisson_4,2023,5,0.665047,0.224836,1.106429,5,5,6,0,0,1
5,germany,naive,2023,5,0.666667,0.230878,1.098612,6,6,5,1,1,1
6,germany,poisson_3,2023,5,0.681258,0.233323,1.1207,7,7,7,1,1,1
7,germany,poisson_5,2023,5,0.707801,0.243882,1.170084,8,8,8,1,1,1
8,germany,poisson_1,2024,5,0.627232,0.212131,1.04272,1,1,1,0,0,0
9,germany,bradley_terry_4,2024,5,0.627608,0.2125,1.042915,2,3,2,0,0,0


## Italy

In [11]:
metrics[metrics["championship"] == "italy"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,italy,poisson_2,2023,5,0.61085,0.197724,1.01802,1,1,1,0,0,0
1,italy,poisson_1,2023,5,0.612056,0.19917,1.019805,2,2,2,0,0,0
2,italy,bradley_terry_4,2023,5,0.617026,0.200635,1.022016,3,3,3,0,0,0
3,italy,bradley_terry_3,2023,5,0.618322,0.201165,1.023986,4,4,4,0,0,0
4,italy,poisson_4,2023,5,0.66473,0.220952,1.103594,5,5,6,0,0,1
5,italy,naive,2023,5,0.666667,0.226348,1.098612,6,6,5,1,1,1
6,italy,poisson_3,2023,5,0.680222,0.228629,1.118747,7,7,7,1,1,1
7,italy,poisson_5,2023,5,0.705905,0.242561,1.169616,8,8,8,1,1,1
8,italy,bradley_terry_3,2024,5,0.616156,0.207761,1.026393,1,2,1,0,0,0
9,italy,bradley_terry_4,2024,5,0.616225,0.207721,1.026451,2,1,2,0,0,0


## Netherlands

In [12]:
metrics[metrics["championship"] == "netherlands"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,netherlands,poisson_2,2023,5,0.555518,0.182842,0.939529,1,1,1,0,0,0
1,netherlands,poisson_1,2023,5,0.559152,0.184004,0.945024,2,2,2,0,0,0
2,netherlands,bradley_terry_4,2023,5,0.574422,0.189762,0.971537,3,3,3,0,0,0
3,netherlands,bradley_terry_3,2023,5,0.575555,0.190198,0.973545,4,4,4,0,0,0
4,netherlands,poisson_4,2023,5,0.646318,0.224224,1.073513,5,5,5,0,0,0
5,netherlands,poisson_3,2023,5,0.663965,0.232898,1.091735,6,6,6,0,0,0
6,netherlands,naive,2023,5,0.666667,0.233367,1.098612,7,7,7,1,1,1
7,netherlands,poisson_5,2023,5,0.671736,0.237895,1.108494,8,8,8,1,1,1
8,netherlands,poisson_2,2024,5,0.61202,0.206165,1.018922,1,1,1,0,0,0
9,netherlands,poisson_1,2024,5,0.619695,0.210646,1.03315,2,2,2,0,0,0


## Portugal

In [13]:
metrics[metrics["championship"] == "portugal"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,portugal,bradley_terry_3,2023,5,0.58117,0.194929,0.978762,1,1,1,0,0,0
1,portugal,bradley_terry_4,2023,5,0.581196,0.195022,0.97878,2,2,2,0,0,0
2,portugal,poisson_2,2023,5,0.588084,0.20019,0.985421,3,3,3,0,0,0
3,portugal,poisson_1,2023,5,0.590407,0.201629,0.990059,4,4,4,0,0,0
4,portugal,poisson_4,2023,5,0.648021,0.224949,1.075314,5,5,5,0,0,0
5,portugal,poisson_3,2023,5,0.666619,0.233944,1.095445,6,7,6,0,1,0
6,portugal,naive,2023,5,0.666667,0.233917,1.098612,7,6,7,1,1,1
7,portugal,poisson_5,2023,5,0.697625,0.249318,1.148747,8,8,8,1,1,1
8,portugal,bradley_terry_4,2024,5,0.58761,0.197051,0.987602,1,2,1,0,0,0
9,portugal,bradley_terry_3,2024,5,0.58802,0.197024,0.988719,2,1,2,0,0,0


## Spain

In [14]:
metrics[metrics["championship"] == "spain"][cols].sort_values(
    by=["year", "bs_rank"], ignore_index=True
)

Unnamed: 0,championship,model_name,year,num_rounds,bs,rps,ls,bs_rank,rps_rank,ls_rank,bs_rank_gt_naive,rps_rank_gt_naive,ls_rank_gt_naive
0,spain,poisson_2,2023,5,0.593093,0.198861,0.993205,1,1,1,0,0,0
1,spain,poisson_1,2023,5,0.601776,0.202878,1.00456,2,4,2,0,0,0
2,spain,bradley_terry_3,2023,5,0.60565,0.202282,1.018074,3,2,3,0,0,0
3,spain,bradley_terry_4,2023,5,0.606635,0.202551,1.020711,4,3,4,0,0,0
4,spain,poisson_4,2023,5,0.643701,0.222028,1.070347,5,5,5,0,0,0
5,spain,poisson_3,2023,5,0.663221,0.231646,1.092066,6,6,6,0,0,0
6,spain,naive,2023,5,0.666667,0.232232,1.098612,7,7,7,1,1,1
7,spain,poisson_5,2023,5,0.702682,0.250115,1.162232,8,8,8,1,1,1
8,spain,poisson_2,2024,5,0.594461,0.205064,0.997053,1,1,1,0,0,0
9,spain,poisson_1,2024,5,0.600366,0.207036,1.005886,2,2,2,0,0,0


## Overall

In [15]:
metrics.groupby("model_name").agg(
    num_rounds=("num_rounds", "sum"),
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
).reset_index().sort_values(by="bs", ignore_index=True)

Unnamed: 0,model_name,num_rounds,bs,rps,ls
0,poisson_2,80,0.610633,0.210251,1.018975
1,poisson_1,80,0.610825,0.209864,1.019823
2,bradley_terry_3,80,0.612971,0.210693,1.024966
3,bradley_terry_4,80,0.613169,0.210774,1.025692
4,poisson_4,80,0.657366,0.229253,1.089806
5,naive,80,0.666667,0.234959,1.098612
6,poisson_3,80,0.670464,0.235598,1.102677
7,poisson_5,80,0.699338,0.250923,1.154573
