In [1]:
import numpy as np
import pandas as pd

In [2]:
def create_rank(df, metric_name):
    df[f"{metric_name}_rank"] = (
        df.groupby(["championship", "year"])[metric_name]
        .rank(method="min", ascending=True)
        .astype(int)
    )

    df[f"{metric_name}_rank_gt_naive"] = df["model_name"] == "naive_1"
    df[f"{metric_name}_rank_gt_naive"] = (
        df.sort_values(["championship", "year", f"{metric_name}_rank"])
        .groupby(["championship", "year"])[f"{metric_name}_rank_gt_naive"]
        .cumsum()
        .astype(int)
    )

    return df

In [3]:
def calculate_metrics(country: str):
    df = pd.read_csv(f"../real_data/results/metrics_{country}.csv")
    metrics = (
        df[df["year"] > 2019]
        .groupby(["championship", "model_name", "year"])
        .agg(
            num_scenarios=("num_games", "nunique"),
            bs=("brier_score", "mean"),
            rps=("ranked_probability_score", "mean"),
            ls=("log_score", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
    )

    metrics["interval_score"] = metrics["interval_score"].fillna(np.inf)
    metrics = create_rank(metrics, "bs")
    metrics = create_rank(metrics, "rps")
    metrics = create_rank(metrics, "ls")
    metrics = create_rank(metrics, "interval_score")
    metrics = metrics.sort_values(by=["bs_rank", "year"], ignore_index=True)
    cols = [
        "championship",
        "model_name",
        "year",
        "num_scenarios",
        "bs",
        "rps",
        "ls",
        "interval_score",
        "bs_rank",
        "rps_rank",
        "ls_rank",
        "interval_score_rank",
        "bs_rank_gt_naive",
        "rps_rank_gt_naive",
        "ls_rank_gt_naive",
        "interval_score_rank_gt_naive",
    ]

    return (
        metrics[cols]
        .groupby(["championship", "model_name"])
        .agg(
            num_scenarios=("num_scenarios", "sum"),
            bs=("bs", "mean"),
            rps=("rps", "mean"),
            ls=("ls", "mean"),
            interval_score=("interval_score", "mean"),
        )
        .reset_index()
        .sort_values(by=["bs"], ascending=True, ignore_index=True)
        .head(16)
    )

In [4]:
calculate_metrics("brazil")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,brazil,poisson_7,165,0.630014,0.219919,1.046596,1067.415188
1,brazil,poisson_2,165,0.630913,0.220278,1.047672,1066.430691
2,brazil,poisson_4,165,0.639401,0.224222,1.06214,1089.590852
3,brazil,poisson_9,165,0.64016,0.224683,1.06323,1092.98156
4,brazil,naive_2,165,0.641428,0.2259,1.062325,1081.356062
5,brazil,poisson_6,165,0.651096,0.229241,1.076061,1067.840198
6,brazil,poisson_1,165,0.651361,0.229295,1.07652,1068.206311
7,brazil,bradley_terry_3,165,0.652508,0.230284,1.081734,1079.76504
8,brazil,bradley_terry_4,165,0.653918,0.230705,1.084799,1081.968361
9,brazil,poisson_3,165,0.657054,0.232784,1.087509,1089.601354


In [5]:
calculate_metrics("england")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,england,poisson_4,165,0.577871,0.206755,0.971716,1080.20865
1,england,poisson_9,165,0.581685,0.207474,0.978171,1079.784214
2,england,bradley_terry_3,165,0.582348,0.208773,0.97953,1060.640525
3,england,bradley_terry_4,165,0.58276,0.208829,0.980365,1061.364846
4,england,poisson_3,165,0.585899,0.21075,0.984327,1080.348767
5,england,poisson_8,165,0.590833,0.211333,0.992685,1078.638225
6,england,poisson_2,165,0.592952,0.212995,0.993752,1071.888315
7,england,poisson_7,165,0.595943,0.213,0.998083,1074.536097
8,england,poisson_1,165,0.602388,0.215043,1.007231,1071.371866
9,england,poisson_6,165,0.608222,0.217906,1.015491,1075.738058


In [6]:
calculate_metrics("france")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,france,poisson_4,157,0.606583,0.217992,1.018575,1068.289548
1,france,poisson_3,157,0.608255,0.218816,1.020052,1067.634792
2,france,poisson_9,157,0.608685,0.218087,1.022482,1068.08213
3,france,poisson_2,157,0.609976,0.219631,1.020657,1060.044308
4,france,poisson_8,157,0.611526,0.218674,1.026266,1065.931369
5,france,bradley_terry_3,157,0.611946,0.220301,1.026733,1076.539557
6,france,poisson_7,157,0.612104,0.219449,1.022407,1060.652948
7,france,bradley_terry_4,157,0.612645,0.22052,1.028354,1077.4086
8,france,poisson_1,157,0.613542,0.219146,1.024561,1058.368289
9,france,poisson_6,157,0.617835,0.220985,1.030028,1061.337053


In [7]:
calculate_metrics("germany")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,germany,poisson_7,145,0.614336,0.212052,1.024122,1039.129719
1,germany,poisson_9,145,0.620257,0.214713,1.033997,1052.275007
2,germany,poisson_1,145,0.621211,0.215046,1.034968,1038.395262
3,germany,poisson_2,145,0.623363,0.216272,1.036169,1041.899563
4,germany,poisson_6,145,0.624187,0.216474,1.038676,1039.967397
5,germany,poisson_8,145,0.625871,0.21772,1.044107,1051.53801
6,germany,poisson_4,145,0.626782,0.216938,1.044016,1057.101058
7,germany,bradley_terry_3,145,0.63017,0.219176,1.049908,1049.94758
8,germany,bradley_terry_4,145,0.630319,0.21926,1.050312,1050.421301
9,germany,poisson_3,145,0.634184,0.221025,1.055937,1058.646681


In [8]:
calculate_metrics("italy")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,italy,poisson_1,165,0.606503,0.205108,1.012732,1062.399998
1,italy,poisson_8,165,0.60718,0.204498,1.015278,1064.894179
2,italy,poisson_9,165,0.608264,0.204305,1.016422,1066.138591
3,italy,poisson_4,165,0.60867,0.204212,1.017521,1065.444254
4,italy,poisson_3,165,0.609274,0.204963,1.01961,1065.575951
5,italy,bradley_terry_4,165,0.610638,0.205948,1.01905,1070.902391
6,italy,bradley_terry_3,165,0.610693,0.205958,1.019048,1070.400559
7,italy,poisson_7,165,0.611165,0.207055,1.018681,1067.097515
8,italy,poisson_6,165,0.612973,0.208058,1.021753,1069.559354
9,italy,poisson_2,165,0.614385,0.208557,1.022308,1064.736253


In [9]:
calculate_metrics("netherlands")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,netherlands,poisson_9,145,0.575958,0.199218,0.969666,1065.383711
1,netherlands,poisson_4,145,0.577243,0.19961,0.971619,1066.922418
2,netherlands,poisson_2,145,0.58307,0.201983,0.97989,1056.975085
3,netherlands,poisson_8,145,0.583986,0.202612,0.981986,1065.192364
4,netherlands,poisson_7,145,0.584945,0.202196,0.983387,1058.657065
5,netherlands,poisson_3,145,0.58624,0.203803,0.985494,1069.194292
6,netherlands,poisson_1,145,0.589293,0.202839,0.98977,1056.950829
7,netherlands,bradley_terry_4,145,0.591204,0.20604,0.99185,1070.253728
8,netherlands,bradley_terry_3,145,0.591261,0.206087,0.991865,1070.249873
9,netherlands,poisson_6,145,0.596152,0.20643,0.999521,1061.421492


In [10]:
calculate_metrics("portugal")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,portugal,poisson_9,145,0.564199,0.191836,0.953298,1052.917291
1,portugal,poisson_4,145,0.564517,0.191735,0.953734,1050.910575
2,portugal,bradley_terry_3,145,0.568569,0.195297,0.960496,1053.415102
3,portugal,bradley_terry_4,145,0.568687,0.195354,0.960781,1054.234624
4,portugal,poisson_3,145,0.570477,0.19466,0.963623,1050.360015
5,portugal,poisson_8,145,0.571846,0.195146,0.965789,1051.946751
6,portugal,poisson_7,145,0.572114,0.196374,0.966132,1051.370368
7,portugal,poisson_2,145,0.572658,0.196622,0.966606,1050.732565
8,portugal,poisson_1,145,0.577488,0.198302,0.974221,1048.582312
9,portugal,poisson_6,145,0.582776,0.200646,0.981726,1053.121554


In [11]:
calculate_metrics("spain")

Unnamed: 0,championship,model_name,num_scenarios,bs,rps,ls,interval_score
0,spain,poisson_2,165,0.605886,0.209128,1.013028,1049.998972
1,spain,poisson_7,165,0.605971,0.209061,1.01323,1050.70232
2,spain,poisson_4,165,0.607474,0.209195,1.019601,1053.782902
3,spain,poisson_9,165,0.608171,0.209502,1.021067,1055.636717
4,spain,poisson_1,165,0.615526,0.213198,1.027449,1050.025152
5,spain,poisson_3,165,0.616282,0.213542,1.033825,1054.966207
6,spain,bradley_terry_3,165,0.616381,0.213097,1.03245,1060.820844
7,spain,bradley_terry_4,165,0.616744,0.213095,1.033881,1061.536646
8,spain,poisson_8,165,0.617007,0.213598,1.034783,1055.896599
9,spain,poisson_6,165,0.618622,0.214604,1.031526,1053.79014


In [12]:
countries = [
    "brazil",
    "england",
    "france",
    "germany",
    "italy",
    "netherlands",
    "portugal",
    "spain",
]

metrics = pd.concat(
    [calculate_metrics(country) for country in countries], ignore_index=True
)

metrics.groupby("model_name").agg(
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
    interval_score=("interval_score", "mean"),
).reset_index().sort_values(by="bs", ascending=True, ignore_index=True)

Unnamed: 0,model_name,bs,rps,ls,interval_score
0,poisson_9,0.600922,0.208727,1.007292,1066.649903
1,poisson_4,0.601068,0.208833,1.007365,1066.531282
2,poisson_7,0.603324,0.209888,1.00908,1058.695153
3,poisson_2,0.60415,0.210683,1.01001,1057.838219
4,bradley_terry_3,0.607985,0.212372,1.01772,1065.222385
5,poisson_8,0.608246,0.212032,1.018763,1065.549249
6,bradley_terry_4,0.608364,0.212469,1.018674,1066.011312
7,poisson_3,0.608458,0.212543,1.018797,1067.041007
8,poisson_1,0.609664,0.212247,1.018432,1056.787502
9,poisson_6,0.613983,0.214293,1.024348,1060.346906


In [13]:
countries = ["brazil", "england", "italy", "spain"]

metrics = pd.concat(
    [calculate_metrics(country) for country in countries], ignore_index=True
)

metrics.groupby("model_name").agg(
    bs=("bs", "mean"),
    rps=("rps", "mean"),
    ls=("ls", "mean"),
    interval_score=("interval_score", "mean"),
).reset_index().sort_values(by="bs", ascending=True, ignore_index=True)

Unnamed: 0,model_name,bs,rps,ls,interval_score
0,poisson_4,0.608354,0.211096,1.017745,1072.256664
1,poisson_9,0.60957,0.211491,1.019722,1073.635271
2,poisson_7,0.610773,0.212259,1.019148,1064.93778
3,poisson_2,0.611034,0.212739,1.01919,1063.263558
4,bradley_terry_3,0.615483,0.214528,1.02819,1067.906742
5,bradley_terry_4,0.616015,0.214644,1.029524,1068.943061
6,poisson_3,0.617127,0.21551,1.031318,1072.62307
7,poisson_8,0.618185,0.215526,1.03299,1072.446374
8,poisson_1,0.618945,0.215661,1.030983,1063.000832
9,poisson_6,0.622728,0.217452,1.036208,1066.731938
