# Dixon Coles Model, no time dependence

The time dependence of the full model is hard to implement, so this one has the tau correction term.

In [235]:
from modeling import *
from plotting import *
from predictions import predict_outcomes_dixon_coles, predict_outcomes_dixon_coles_et
from data import get_data, cut_data
import arviz as az
from arviz import hdi
import pickle
import seaborn as sns
import matplotlib.patheffects as PathEffects
import matplotlib.pyplot as plt
from scipy.stats import poisson
import pandas as pd
from itertools import product
from pystan import check_hmc_diagnostics
#from pystan.experimental import unpickle_fit
from collections import defaultdict

In [3]:
import multiprocessing
multiprocessing.set_start_method("fork")

In [69]:
data.head()

Unnamed: 0_level_0,home_team,away_team,home_score,away_score,country,neutral
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,Iraq,United Arab Emirates,0.0,0.0,Kuwait,True
2018-01-02,Oman,Bahrain,1.0,0.0,Kuwait,True
2018-01-05,Oman,United Arab Emirates,0.0,0.0,Kuwait,True
2018-01-07,Estonia,Sweden,1.0,1.0,United Arab Emirates,True
2018-01-11,Denmark,Sweden,0.0,1.0,United Arab Emirates,True


In [4]:
model = compile_or_load('stan/dixon_coles_no_td.stan', extra_compile_args=["-w"], force=True)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1bcda930bbc60ce53d8b16331954b2e2 NOW.


In [43]:
data = cut_data(get_data(), '2018-01-01')
stan_data = prepare_stan_data(data)
countries = get_country_list(data)

In [44]:
fit = model.sampling(stan_data, iter=3000) #, control={'max_treedepth': 12})


Gradient evaluation took 0.003544 seconds
1000 transitions using 10 leapfrog steps per transition would take 35.44 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.004066 seconds
1000 transitions using 10 leapfrog steps per transition would take 40.66 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.003823 seconds
1000 transitions using 10 leapfrog steps per transition would take 38.23 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.00381 seconds
1000 transitions using 10 leapfrog steps per transition would take 38.1 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 3000 [  0%]  (Warmup)
Iteration:    1 / 3000 [  0%]  (Warmup)
Iteration:    1 / 3000 [  0%]  (Warmup)
Iteration:    1 / 3000 [  0%]  (Warmup)
Iteration:  300 / 3000 [ 10%]  (Warmup)
Iteration:  300 / 3000 [ 10%]  (Warmup)
Iteration:  300 / 3000 [ 10%]  (Warmup)
Iteration:  300 / 3000 [ 10%]  (Warmup)
Iteration:  600 / 3000

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [45]:
chains = fit.extract()

In [46]:
check_hmc_diagnostics(fit)

{'n_eff': True,
 'Rhat': True,
 'divergence': True,
 'treedepth': True,
 'energy': True}

In [48]:
home_advantage = chains['home_advantage']
intercept = chains['intercept']
rho = chains['rho']

In [70]:
day1_matches = pd.read_csv("matches/EURO_2024/pre_day_1.csv")

In [71]:
for _, match in day1_matches.iterrows():
    home_team = match.home_team
    away_team = match.away_team
    home_idx = countries.index(match.home_team)
    away_idx = countries.index(match.away_team)
    location = match.location
    attack_home = chains["attack"][:, home_idx]
    attack_away = chains["attack"][:, away_idx]
    defense_home = chains["defense"][:, home_idx]
    defense_away = chains["defense"][:, away_idx]
    A = predict_outcomes_dixon_coles(
        attack_home=attack_home,
        attack_away=attack_away,
        defense_home=defense_home,
        defense_away=defense_away,
        intercept=intercept,
        home_advantage=home_advantage,
        rho=rho,
        home_team=home_team,
        away_team=away_team,
        location=location
    )
    home_goals, away_goals = np.unravel_index(A.argmax(), A.shape)
    print(home_team, away_team, home_goals, away_goals)

Germany Scotland 2 0
Hungary Switzerland 0 1
Spain Croatia 1 0
Italy Albania 1 0
Poland Netherlands 0 1
Slovenia Denmark 0 1
Serbia England 0 1
Romania Ukraine 0 1
Belgium Slovakia 2 0
Austria France 0 1
Turkey Georgia 1 1
Portugal Czech Republic 1 0


In [72]:
day2_matches = pd.read_csv("matches/EURO_2024/pre_day_2.csv")

In [73]:
for _, match in day2_matches.iterrows():
    home_team = match.home_team
    away_team = match.away_team
    home_idx = countries.index(match.home_team)
    away_idx = countries.index(match.away_team)
    location = match.location
    attack_home = chains["attack"][:, home_idx]
    attack_away = chains["attack"][:, away_idx]
    defense_home = chains["defense"][:, home_idx]
    defense_away = chains["defense"][:, away_idx]
    A = predict_outcomes_dixon_coles(
        attack_home=attack_home,
        attack_away=attack_away,
        defense_home=defense_home,
        defense_away=defense_away,
        intercept=intercept,
        home_advantage=home_advantage,
        rho=rho,
        home_team=home_team,
        away_team=away_team,
        location=location
    )
    home_goals, away_goals = np.unravel_index(A.argmax(), A.shape)
    print(home_team, away_team, home_goals, away_goals)

Croatia Albania 1 0
Germany Hungary 2 0
Scotland Switzerland 0 1
Slovenia Serbia 0 1
Denmark England 0 1
Spain Italy 1 0
Slovakia Ukraine 0 1
Poland Austria 1 1
Netherlands France 1 1
Georgia Czech Republic 0 1
Turkey Portugal 0 2
Belgium Romania 1 0


In [74]:
day3_matches = pd.read_csv("matches/EURO_2024/pre_day_3.csv")

In [75]:
for _, match in day3_matches.iterrows():
    home_team = match.home_team
    away_team = match.away_team
    home_idx = countries.index(match.home_team)
    away_idx = countries.index(match.away_team)
    location = match.location
    attack_home = chains["attack"][:, home_idx]
    attack_away = chains["attack"][:, away_idx]
    defense_home = chains["defense"][:, home_idx]
    defense_away = chains["defense"][:, away_idx]
    A = predict_outcomes_dixon_coles(
        attack_home=attack_home,
        attack_away=attack_away,
        defense_home=defense_home,
        defense_away=defense_away,
        intercept=intercept,
        home_advantage=home_advantage,
        rho=rho,
        home_team=home_team,
        away_team=away_team,
        location=location
    )
    home_goals, away_goals = np.unravel_index(A.argmax(), A.shape)
    print(home_team, away_team, home_goals, away_goals)

Switzerland Germany 1 1
Scotland Hungary 0 0
Albania Spain 0 2
Croatia Italy 0 1
France Poland 1 0
Netherlands Austria 1 0
Denmark Serbia 1 0
England Slovenia 1 0
Slovakia Romania 1 1
Ukraine Belgium 0 1
Georgia Portugal 0 2
Czech Republic Turkey 1 1


In [76]:
all_countries = set(day1_matches.home_team) | set(day2_matches.home_team) | set(day3_matches.home_team)

In [78]:
list(filter(lambda x: x.startswith("T"), all_countries))

['Turkey']

In [79]:
groups = {
    "A": [
        "Germany",
        "Scotland",
        "Hungary",
        "Switzerland"
    ],
    "B": [
        "Spain",
        "Croatia",
        "Italy",
        "Albania"
    ],
    "C": [
        "Slovenia",
        "Denmark",
        "Serbia",
        "England"
    ],
    "D": [
        "Netherlands",
        "France",
        "Poland",
        "Austria"
    ],
    "E": [
        "Ukraine",
        "Slovakia",
        "Belgium",
        "Romania"
    ],
    "F": [
        "Portugal",
        "Czech Republic",
        "Georgia",
        "Turkey"
    ]
}

In [117]:
def predict_outcome_matrix(
    home_team, 
    away_team,
    all_countries,
    chains
):
    home_team = home_team
    away_team = away_team
    home_idx = countries.index(home_team)
    away_idx = countries.index(away_team)
    location = "Germany"
    attack_home = chains["attack"][:, home_idx]
    attack_away = chains["attack"][:, away_idx]
    defense_home = chains["defense"][:, home_idx]
    defense_away = chains["defense"][:, away_idx]
    home_advantage = chains["home_advantage"]
    intercept = chains['intercept']
    rho = chains['rho']
    A = predict_outcomes_dixon_coles(
        attack_home=attack_home,
        attack_away=attack_away,
        defense_home=defense_home,
        defense_away=defense_away,
        intercept=intercept,
        home_advantage=home_advantage,
        rho=rho,
        home_team=home_team,
        away_team=away_team,
        location=location
    )
    return A

In [118]:
def get_most_likely_outcome(
    home_team,
    away_team,
    all_countries,
    chains
):
    A = predict_outcome_matrix(
        home_team=home_team,
        away_team=away_team,
        chains=chains,
        all_countries=all_countries
    )
    home_goals, away_goals = np.unravel_index(A.argmax(), A.shape)
    return home_goals, away_goals

In [82]:
from itertools import combinations
from collections import defaultdict

In [88]:
points

{'A': defaultdict(int, {})}

In [253]:
points = {}
goals_made = {}
goals_received = {}
for group, members in groups.items():
    print(group)
    group_points = defaultdict(int)
    group_goals_made = defaultdict(int)
    group_goals_received = defaultdict(int)
    group_matches = combinations(members, 2)
    for home_team, away_team in group_matches:
        home_goals, away_goals = get_most_likely_outcome(
            home_team=home_team,
            away_team=away_team,
            all_countries=all_countries,
            chains=chains
        )
        print(home_team, away_team, home_goals, away_goals)
        group_goals_made[home_team] += home_goals
        group_goals_made[away_team] += away_goals
        group_goals_received[home_team] += away_goals
        group_goals_received[away_team] += home_goals
        if home_goals > away_goals:
            group_points[home_team] += 3
            group_points[away_team] += 0
        elif away_goals > home_goals:
            group_points[home_team] += 0
            group_points[away_team] += 3
        else:
            group_points[home_team] += 1
            group_points[away_team] += 1
    points[group] = group_points
    goals_made[group] = group_goals_made
    goals_received[group] = group_goals_received

A
Germany Scotland 2 0
Germany Hungary 2 0
Germany Switzerland 1 1
Scotland Hungary 0 0
Scotland Switzerland 0 1
Hungary Switzerland 0 1
B
Spain Croatia 1 0
Spain Italy 1 0
Spain Albania 2 0
Croatia Italy 0 1
Croatia Albania 1 0
Italy Albania 1 0
C
Slovenia Denmark 0 1
Slovenia Serbia 0 1
Slovenia England 0 1
Denmark Serbia 1 0
Denmark England 0 1
Serbia England 0 1
D
Netherlands France 1 1
Netherlands Poland 1 0
Netherlands Austria 1 0
France Poland 1 0
France Austria 1 0
Poland Austria 1 1
E
Ukraine Slovakia 1 0
Ukraine Belgium 0 1
Ukraine Romania 1 0
Slovakia Belgium 0 2
Slovakia Romania 1 1
Belgium Romania 1 0
F
Portugal Czech Republic 1 0
Portugal Georgia 2 0
Portugal Turkey 2 0
Czech Republic Georgia 1 0
Czech Republic Turkey 1 1
Georgia Turkey 1 1


In [220]:
group_dfs = {}
for group in groups.keys():
    df = pd.DataFrame(
        {
            "group": group,
            "points": points[group], 
            "goals_made": goals_made[group], 
            "goals_received": goals_received[group]
        }
    )
    df["goal_diff"] = df.goals_made - df.goals_received
    df = df.sort_values(by=["points", "goal_diff", "goals_made"], ascending=False)
    group_dfs[group] = df
thirds = pd.concat([df.iloc[[2]] for df in group_dfs.values()], axis=0)

In [222]:
group_dfs

{'A':             group  points  goals_made  goals_received  goal_diff
 Germany         A       7           5               1          4
 Switzerland     A       7           3               1          2
 Scotland        A       1           0               3         -3
 Hungary         A       1           0               3         -3,
 'B':         group  points  goals_made  goals_received  goal_diff
 Spain       B       9           4               0          4
 Italy       B       6           2               1          1
 Croatia     B       3           1               2         -1
 Albania     B       0           0               4         -4,
 'C':          group  points  goals_made  goals_received  goal_diff
 England      C       9           3               0          3
 Denmark      C       6           2               1          1
 Serbia       C       3           1               2         -1
 Slovenia     C       0           0               3         -3,
 'D':             group  po

In [223]:
thirds.sort_values(by=["points", "goal_diff", "goals_made"], ascending=False)

Unnamed: 0,group,points,goals_made,goals_received,goal_diff
Croatia,B,3,1,2,-1
Serbia,C,3,1,2,-1
Turkey,F,2,2,4,-2
Poland,D,1,1,3,-2
Romania,E,1,1,3,-2
Scotland,A,1,0,3,-3


In [227]:
round_of_sixteen_games = [
    (group_dfs["B"].index[0], "Poland"),
    (group_dfs["A"].index[0], group_dfs["C"].index[1]),
    (group_dfs["F"].index[0], "Croatia"),
    ("Netherlands", group_dfs["E"].index[1]),    
    (group_dfs["E"].index[0], "Serbia"),
    ("France", group_dfs["F"].index[1]),
    (group_dfs["A"].index[1], group_dfs["B"].index[1]),
    (group_dfs["C"].index[0], "Turkey"),
]
round_of_sixteen_results = {}
for i, (home_team, away_team) in enumerate(round_of_sixteen_games):
    A = predict_outcome_matrix(home_team, away_team, all_countries, chains)
    p_home = np.tril(A, -1).sum()
    p_away = np.triu(A, 1).sum()
    round_of_sixteen_results[i + 1] = home_team if p_home > p_away else away_team

In [228]:
round_of_sixteen_results

{1: 'Spain',
 2: 'Germany',
 3: 'Portugal',
 4: 'Netherlands',
 5: 'Belgium',
 6: 'France',
 7: 'Italy',
 8: 'England'}

In [229]:
c = 1
quarter_final_results = {}
for i in range(1, 9, 2):
    home_team = round_of_sixteen_results[i]
    away_team = round_of_sixteen_results[i + 1]
    A = predict_outcome_matrix(home_team, away_team, all_countries, chains)
    p_home = np.tril(A, -1).sum()
    p_away = np.triu(A, 1).sum()
    quarter_final_results[c] = home_team if p_home > p_away else away_team
    c += 1

In [230]:
c = 1
semi_final_results = {}
for i in range(1, 4, 2):
    home_team = quarter_final_results[i]
    away_team = quarter_final_results[i + 1]
    A = predict_outcome_matrix(home_team, away_team, all_countries, chains)
    p_home = np.tril(A, -1).sum()
    p_away = np.triu(A, 1).sum()
    semi_final_results[c] = home_team if p_home > p_away else away_team
    c += 1

In [234]:
round_of_sixteen_results

{1: 'Spain',
 2: 'Germany',
 3: 'Portugal',
 4: 'Netherlands',
 5: 'Belgium',
 6: 'France',
 7: 'Italy',
 8: 'England'}

In [231]:
quarter_final_results

{1: 'Spain', 2: 'Portugal', 3: 'Belgium', 4: 'England'}

In [232]:
semi_final_results

{1: 'Spain', 2: 'England'}

In [233]:
A = predict_outcome_matrix("Spain", "Italy", all_countries, chains)
p_home = np.tril(A, -1).sum()
p_away = np.triu(A, 1).sum()
winner = "Spain" if p_home > p_away else "Italy"
print("EURO 2024 Winner: " + winner)

EURO 2024 Winner: Spain
