In [1]:
import numpy as np
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby(["championship", "year"])[metric_name]
        .rank(method="min", ascending=True)
        .astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive_1"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["championship", "year", f"{metric_name}_rank"])
        .groupby(["championship", "year"])[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
def calculate_metrics(country: str):
    df = pd.read_csv(f"../real_data/results/metrics_{country}.csv")
    metrics = (
        df[(df["year"] > 2019) & (df["year"] < 2025)]
        .groupby(["championship", "model_name", "year"])
        .agg(
            num_scenarios=("num_games", "nunique"),
            bs=("brier_score", "mean"),
            rps=("ranked_probability_score", "mean"),
            ls=("log_score", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
    )

    metrics["interval_score"] = metrics["interval_score"].fillna(np.inf)
    metrics = create_rank(metrics, "bs")
    metrics = create_rank(metrics, "rps")
    metrics = create_rank(metrics, "ls")
    metrics = create_rank(metrics, "interval_score")
    metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
    cols = [
        "championship",
        "model_name",
        "year",
        "num_scenarios",
        "bs",
        "rps",
        "ls",
        "interval_score",
        "bs_rank",
        "rps_rank",
        "ls_rank",
        "interval_score_rank",
        "bs_rank_gt_naive",
        "rps_rank_gt_naive",
        "ls_rank_gt_naive",
        "interval_score_rank_gt_naive",
    ]

    return (
        metrics[cols]
        .groupby(["championship", "model_name"])
        .agg(
            num_scenarios=("num_scenarios", "sum"),
            bs=("bs", "mean"),
            rps=("rps", "mean"),
            ls=("ls", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
        .sort_values(by=["bs"], ascending=True, ignore_index=True)
        .head(16)
    )

In [4]:
calculate_metrics("brazil")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,brazil,poisson_7,165,0.629988,0.219941,1.046529,1067.403036
1,brazil,poisson_2,165,0.630938,0.220266,1.047717,1066.39417
2,brazil,poisson_9,165,0.640302,0.224713,1.063482,1092.735023
3,brazil,naive_2,165,0.641428,0.2259,1.062325,1081.318767
4,brazil,poisson_4,165,0.641975,0.225534,1.065183,1087.455522
5,brazil,poisson_6,165,0.651189,0.229269,1.076192,1067.859817
6,brazil,poisson_1,165,0.651432,0.22933,1.076613,1068.256577
7,brazil,bradley_terry_3,165,0.652691,0.230368,1.082013,1079.90517
8,brazil,bradley_terry_4,165,0.653994,0.230701,1.084993,1081.849862
9,brazil,poisson_8,165,0.657858,0.232747,1.089205,1090.143087


In [5]:
calculate_metrics("england")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,england,poisson_4,165,0.580985,0.207956,0.977097,1081.374985
1,england,poisson_9,165,0.581328,0.20739,0.977533,1079.190924
2,england,bradley_terry_3,165,0.582293,0.208747,0.979499,1060.697369
3,england,bradley_terry_4,165,0.582844,0.208881,0.980502,1061.359012
4,england,poisson_8,165,0.59038,0.211293,0.991805,1078.047763
5,england,poisson_3,165,0.590895,0.211876,0.994316,1087.945257
6,england,poisson_2,165,0.593173,0.213082,0.994093,1071.953141
7,england,poisson_7,165,0.596116,0.213067,0.998369,1074.527106
8,england,poisson_1,165,0.602239,0.214991,1.006988,1071.200174
9,england,poisson_6,165,0.608417,0.217956,1.015774,1075.777801


In [6]:
calculate_metrics("france")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,france,poisson_4,157,0.607581,0.218344,1.022699,1071.107623
1,france,poisson_9,157,0.608431,0.218109,1.02195,1068.221967
2,france,poisson_2,157,0.609893,0.219615,1.02049,1059.856468
3,france,poisson_8,157,0.610893,0.218568,1.024993,1066.026095
4,france,bradley_terry_3,157,0.612091,0.220351,1.026959,1076.371348
5,france,poisson_7,157,0.61232,0.219522,1.022771,1060.772998
6,france,bradley_terry_4,157,0.612641,0.22051,1.0284,1077.38058
7,france,poisson_1,157,0.613624,0.219196,1.024737,1058.429374
8,france,poisson_3,157,0.614537,0.220535,1.033821,1074.59444
9,france,poisson_6,157,0.617716,0.220935,1.029859,1061.303801


In [7]:
calculate_metrics("germany")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,germany,poisson_7,145,0.614241,0.211977,1.023961,1039.081654
1,germany,poisson_9,145,0.62042,0.214732,1.034242,1052.297147
2,germany,poisson_1,145,0.621192,0.215051,1.034902,1038.365385
3,germany,poisson_2,145,0.623416,0.216284,1.036264,1041.849961
4,germany,poisson_6,145,0.624158,0.216483,1.03866,1039.968708
5,germany,poisson_8,145,0.625894,0.217753,1.04405,1051.541699
6,germany,bradley_terry_4,145,0.630266,0.219209,1.050238,1050.446702
7,germany,bradley_terry_3,145,0.630396,0.219243,1.050247,1049.817225
8,germany,poisson_4,145,0.632866,0.219796,1.055187,1062.307274
9,germany,poisson_3,145,0.639292,0.223658,1.069575,1071.882594


In [8]:
calculate_metrics("italy")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,italy,poisson_1,165,0.606514,0.205109,1.012719,1062.549318
1,italy,poisson_8,165,0.607288,0.204505,1.015409,1064.634503
2,italy,poisson_9,165,0.608583,0.204335,1.017094,1065.923539
3,italy,poisson_3,165,0.608936,0.205183,1.020044,1070.156704
4,italy,bradley_terry_3,165,0.610591,0.205915,1.018851,1070.539565
5,italy,bradley_terry_4,165,0.610684,0.205948,1.01916,1070.916846
6,italy,poisson_7,165,0.611175,0.207045,1.018724,1067.196427
7,italy,poisson_4,165,0.612138,0.205692,1.021739,1066.802646
8,italy,poisson_6,165,0.613037,0.208059,1.021872,1069.561485
9,italy,poisson_2,165,0.614347,0.20854,1.022282,1064.765479


In [9]:
calculate_metrics("netherlands")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,netherlands,poisson_9,145,0.576166,0.199288,0.970061,1065.657781
1,netherlands,poisson_4,145,0.578001,0.200102,0.972988,1069.105169
2,netherlands,poisson_2,145,0.583046,0.202,0.979856,1057.122158
3,netherlands,poisson_8,145,0.584138,0.202725,0.982048,1065.482335
4,netherlands,poisson_7,145,0.584968,0.202196,0.983391,1058.634815
5,netherlands,poisson_1,145,0.589521,0.202974,0.990021,1057.0356
6,netherlands,poisson_3,145,0.589684,0.205283,0.99415,1077.528199
7,netherlands,bradley_terry_4,145,0.591299,0.206083,0.991968,1070.367576
8,netherlands,bradley_terry_3,145,0.591375,0.206142,0.992098,1070.206854
9,netherlands,poisson_6,145,0.596073,0.206414,0.99942,1061.451295


In [10]:
calculate_metrics("portugal")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,portugal,poisson_9,145,0.564162,0.191818,0.953226,1052.710055
1,portugal,poisson_4,145,0.565183,0.192261,0.954499,1050.529399
2,portugal,bradley_terry_4,145,0.568706,0.195385,0.960782,1054.112741
3,portugal,bradley_terry_3,145,0.568811,0.195376,0.960894,1053.489647
4,portugal,poisson_3,145,0.570585,0.194673,0.963493,1050.999403
5,portugal,poisson_8,145,0.571456,0.195041,0.965244,1051.702094
6,portugal,poisson_7,145,0.5722,0.196412,0.966263,1051.396967
7,portugal,poisson_2,145,0.572805,0.19667,0.966811,1050.797042
8,portugal,poisson_1,145,0.577364,0.198238,0.974059,1048.58221
9,portugal,poisson_6,145,0.582978,0.200705,0.982042,1053.086354


In [11]:
calculate_metrics("spain")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,spain,poisson_7,165,0.605713,0.208986,1.012816,1050.519242
1,spain,poisson_2,165,0.605929,0.209143,1.013095,1050.026392
2,spain,poisson_9,165,0.608528,0.209609,1.021575,1055.776919
3,spain,poisson_4,165,0.608752,0.209867,1.021484,1053.550377
4,spain,poisson_1,165,0.615382,0.213133,1.027234,1049.918794
5,spain,bradley_terry_3,165,0.616347,0.213098,1.032363,1060.850656
6,spain,bradley_terry_4,165,0.616599,0.213062,1.033631,1061.343861
7,spain,poisson_8,165,0.616885,0.213585,1.034527,1055.658295
8,spain,poisson_3,165,0.617588,0.213937,1.036897,1056.650686
9,spain,poisson_6,165,0.618703,0.214621,1.031632,1053.721141


In [12]:
countries = [
    "brazil",
    "england",
    "france",
    "germany",
    "italy",
    "netherlands",
    "portugal",
    "spain",
]

metrics = pd.concat(
    [calculate_metrics(country) for country in countries], ignore_index=True
)

metrics.groupby("model_name").agg(
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
    interval_score=("interval_score", "mean"),
).reset_index().sort_values(by="bs", ascending=True, ignore_index=True)

Unnamed: 0,model_name,bs,rps,ls,interval_score
0,poisson_9,0.60099,0.208749,1.007396,1066.564169
1,poisson_7,0.60334,0.209893,1.009103,1058.69153
2,poisson_4,0.603435,0.209944,1.011359,1067.779125
3,poisson_2,0.604193,0.2107,1.010076,1057.845601
4,bradley_terry_3,0.608074,0.212405,1.017866,1065.234729
5,poisson_8,0.608099,0.212027,1.01841,1065.404484
6,bradley_terry_4,0.608379,0.212472,1.018709,1065.972148
7,poisson_1,0.609658,0.212253,1.018409,1056.792179
8,poisson_3,0.611295,0.21355,1.025423,1072.618743
9,poisson_6,0.614034,0.214305,1.024431,1060.3413


In [13]:
countries = ["brazil", "england", "italy", "spain"]

metrics = pd.concat(
    [calculate_metrics(country) for country in countries], ignore_index=True
)

metrics.groupby("model_name").agg(
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
    interval_score=("interval_score", "mean"),
).reset_index().sort_values(by="bs", ascending=True, ignore_index=True)

Unnamed: 0,model_name,bs,rps,ls,interval_score
0,poisson_9,0.609685,0.211512,1.019921,1073.406601
1,poisson_7,0.610748,0.21226,1.01911,1064.911453
2,poisson_4,0.610962,0.212262,1.021376,1072.295883
3,poisson_2,0.611097,0.212758,1.019297,1063.284796
4,bradley_terry_3,0.615481,0.214532,1.028182,1067.99819
5,bradley_terry_4,0.61603,0.214648,1.029571,1068.867395
6,poisson_8,0.618103,0.215532,1.032737,1072.120912
7,poisson_1,0.618892,0.215641,1.030889,1062.981216
8,poisson_3,0.619066,0.216063,1.035587,1076.486327
9,poisson_6,0.622837,0.217476,1.036367,1066.730061
