In [1]:
import numpy as np
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby(["championship", "year"])[metric_name]
        .rank(method="min", ascending=True)
        .astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive_1"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["championship", "year", f"{metric_name}_rank"])
        .groupby(["championship", "year"])[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
def calculate_metrics(country: str):
    df = pd.read_csv(f"../real_data/results/metrics_{country}.csv")
    metrics = (
        df[df["year"] > 2019]
        .groupby(["championship", "model_name", "year"])
        .agg(
            num_scenarios=("num_games", "nunique"),
            bs=("brier_score", "mean"),
            rps=("ranked_probability_score", "mean"),
            ls=("log_score", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
    )

    metrics["interval_score"] = metrics["interval_score"].fillna(np.inf)
    metrics = create_rank(metrics, "bs")
    metrics = create_rank(metrics, "rps")
    metrics = create_rank(metrics, "ls")
    metrics = create_rank(metrics, "interval_score")
    metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
    cols = [
        "championship",
        "model_name",
        "year",
        "num_scenarios",
        "bs",
        "rps",
        "ls",
        "interval_score",
        "bs_rank",
        "rps_rank",
        "ls_rank",
        "interval_score_rank",
        "bs_rank_gt_naive",
        "rps_rank_gt_naive",
        "ls_rank_gt_naive",
        "interval_score_rank_gt_naive",
    ]

    return (
        metrics[cols]
        .groupby(["championship", "model_name"])
        .agg(
            num_scenarios=("num_scenarios", "sum"),
            bs=("bs", "mean"),
            rps=("rps", "mean"),
            ls=("ls", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
        .sort_values(by=["bs"], ascending=True, ignore_index=True)
        .head(16)
    )

In [4]:
calculate_metrics("brazil")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,brazil,poisson_2,25,0.638225,0.22037,1.058391,1116.348638
1,brazil,naive_2,25,0.645321,0.224042,1.068072,1129.38259
2,brazil,poisson_4,25,0.651555,0.226129,1.080823,1157.060261
3,brazil,poisson_1,25,0.655193,0.228113,1.08241,1117.689998
4,brazil,bradley_terry_3,25,0.659401,0.230612,1.091159,1134.268391
5,brazil,bradley_terry_4,25,0.660396,0.230975,1.093454,1137.337482
6,brazil,naive_1,25,0.666667,0.232791,1.098612,1135.151224
7,brazil,poisson_3,25,0.667383,0.233754,1.103942,1156.852752
8,brazil,poisson_5,25,0.681264,0.239936,1.124512,1170.016298


In [5]:
calculate_metrics("england")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,england,bradley_terry_3,25,0.595755,0.211595,0.998475,1100.803306
1,england,bradley_terry_4,25,0.596295,0.211709,0.999394,1102.02548
2,england,poisson_4,25,0.599415,0.213278,1.003606,1136.737213
3,england,poisson_2,25,0.603908,0.215221,1.009622,1116.691904
4,england,poisson_3,25,0.60527,0.216225,1.013463,1135.858224
5,england,poisson_1,25,0.610117,0.216328,1.018697,1116.099331
6,england,naive_1,25,0.666667,0.241302,1.098612,1224.478104
7,england,naive_2,25,0.67045,0.241357,1.109233,1233.159941
8,england,poisson_5,25,0.68388,0.252462,1.130781,1262.545885


In [6]:
calculate_metrics("france")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,france,poisson_2,25,0.617294,0.221341,1.030665,1105.702311
1,france,poisson_1,25,0.617732,0.219622,1.030552,1103.001789
2,france,poisson_3,25,0.619338,0.221543,1.037357,1117.913782
3,france,poisson_4,25,0.620169,0.222006,1.039406,1120.239491
4,france,bradley_terry_3,25,0.624068,0.223221,1.044801,1128.990197
5,france,bradley_terry_4,25,0.625006,0.223506,1.047223,1130.120789
6,france,naive_2,25,0.663832,0.241445,1.096857,1184.277553
7,france,naive_1,25,0.666667,0.240266,1.098612,1184.37525
8,france,poisson_5,25,0.683157,0.250665,1.131221,1192.868291


In [7]:
calculate_metrics("germany")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,germany,poisson_1,25,0.617287,0.213668,1.029463,1063.294535
1,germany,poisson_2,25,0.623791,0.216375,1.036022,1069.143135
2,germany,bradley_terry_4,25,0.627437,0.217299,1.04509,1076.501618
3,germany,bradley_terry_3,25,0.627457,0.217331,1.045172,1075.448958
4,germany,poisson_4,25,0.630912,0.218079,1.051232,1088.675387
5,germany,poisson_3,25,0.634569,0.220577,1.057947,1089.977297
6,germany,naive_1,25,0.666667,0.235031,1.098612,1152.697116
7,germany,naive_2,25,0.667553,0.235986,1.102315,1158.080907
8,germany,poisson_5,25,0.70214,0.25116,1.158639,1191.156054


In [8]:
calculate_metrics("italy")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,italy,poisson_1,25,0.607143,0.205297,1.013972,1114.716666
1,italy,poisson_4,25,0.609217,0.204622,1.019298,1124.076254
2,italy,poisson_2,25,0.610504,0.206706,1.017855,1118.081836
3,italy,poisson_3,25,0.612122,0.206384,1.023879,1124.219116
4,italy,bradley_terry_4,25,0.613558,0.206959,1.023011,1131.124738
5,italy,bradley_terry_3,25,0.613635,0.207034,1.023093,1130.791194
6,italy,naive_1,25,0.666667,0.232224,1.098612,1245.569859
7,italy,naive_2,25,0.668935,0.232444,1.103442,1246.810718
8,italy,poisson_5,25,0.692601,0.243412,1.145405,1277.554941


In [9]:
calculate_metrics("netherlands")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,netherlands,poisson_4,25,0.580503,0.199295,0.978085,1102.581215
1,netherlands,poisson_2,25,0.582803,0.200159,0.979667,1089.573437
2,netherlands,poisson_3,25,0.589288,0.203442,0.991817,1105.392037
3,netherlands,poisson_1,25,0.589434,0.201792,0.990078,1089.27826
4,netherlands,bradley_terry_4,25,0.597649,0.207138,1.002604,1115.705023
5,netherlands,bradley_terry_3,25,0.598007,0.207272,1.003306,1115.837021
6,netherlands,naive_2,25,0.666359,0.237196,1.103444,1222.800801
7,netherlands,naive_1,25,0.666667,0.237655,1.098612,1214.773459
8,netherlands,poisson_5,25,0.681393,0.247169,1.128368,1260.323918


In [10]:
calculate_metrics("portugal")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,portugal,poisson_4,25,0.580164,0.197366,0.976967,1099.837489
1,portugal,bradley_terry_3,25,0.580336,0.199065,0.977234,1096.990302
2,portugal,bradley_terry_4,25,0.580366,0.199111,0.977277,1098.538622
3,portugal,poisson_2,25,0.581323,0.199247,0.978446,1095.66525
4,portugal,poisson_1,25,0.584438,0.200496,0.983556,1093.808653
5,portugal,poisson_3,25,0.584873,0.199793,0.984728,1099.433682
6,portugal,naive_1,25,0.666667,0.23699,1.098612,1224.308903
7,portugal,naive_2,25,0.667299,0.236923,1.104589,1234.69659
8,portugal,poisson_5,25,0.707881,0.256104,1.168111,1258.835177


In [11]:
calculate_metrics("spain")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,spain,poisson_2,25,0.604478,0.207332,1.010976,1089.657852
1,spain,poisson_4,25,0.610811,0.209058,1.023909,1097.383907
2,spain,poisson_1,25,0.615364,0.212125,1.026914,1089.844407
3,spain,poisson_3,25,0.62079,0.213923,1.03946,1098.431447
4,spain,bradley_terry_4,25,0.621421,0.213937,1.039926,1107.469374
5,spain,bradley_terry_3,25,0.621482,0.214099,1.039195,1107.481602
6,spain,naive_2,25,0.653133,0.228998,1.080552,1204.152177
7,spain,naive_1,25,0.666667,0.234575,1.098612,1207.125794
8,spain,poisson_5,25,0.686129,0.243723,1.134186,1235.817498
