In [1]:
import pandas as pd
import glob
import numpy as np
from scipy.optimize import minimize
import os
from scipy.stats import poisson
from collections import Counter

In [2]:
path =r'..\\data'
epl1718 = pd.read_csv(path+"\\football-data EPL 17-18.csv")
dataset = epl1718
dataset.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
0,E0,11/08/2017,Arsenal,Leicester,4,3,H,2,2,D,...,2.32,21,-1.0,1.91,1.85,2.1,2.02,1.49,4.73,7.25
1,E0,12/08/2017,Brighton,Man City,0,2,A,0,0,D,...,2.27,20,1.5,1.95,1.91,2.01,1.96,11.75,6.15,1.29
2,E0,12/08/2017,Chelsea,Burnley,2,3,A,0,3,A,...,2.23,20,-1.75,2.03,1.97,1.95,1.9,1.33,5.4,12.25
3,E0,12/08/2017,Crystal Palace,Huddersfield,0,3,A,0,2,A,...,1.72,18,-0.75,2.1,2.05,1.86,1.83,1.79,3.56,5.51
4,E0,12/08/2017,Everton,Stoke,1,0,H,1,0,H,...,1.76,19,-0.75,1.94,1.9,2.01,1.98,1.82,3.49,5.42


In [3]:
teams = dataset['HomeTeam'].unique()
n_teams = len(teams)
n_teams

20

Model Specification: (Home Team Attack, Away Team Defense, Home Team Attack, Away Team Defense)

# Model 4: (alpha_i, beta_i, gamma_i, delta_i)

Every team has 4 unique parameters:
   * Attack at home
   * Attack away
   * Defense at home
   * Defense away

In [141]:
init_vals = np.concatenate((
                              np.random.uniform(0,1,(n_teams)),  # attack home strength
                              np.random.uniform(0,-1,(n_teams)),  # defence away strength
                              np.random.uniform(0,1,(n_teams)),  # attack away strength
                              np.random.uniform(0,-1,(n_teams))  # defense home strength
                             ))

In [142]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [148]:
def log_likelhood(params):
    score_h_coefs = dict(zip(teams, params[:n_teams]))
    defend_a_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
    score_a_coefs = dict(zip(teams, params[(2*n_teams):(3*n_teams)]))
    defend_h_coefs = dict(zip(teams, params[(3*n_teams):]))
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, score_h_coefs[row.HomeTeam], defend_a_coefs[row.AwayTeam], 
                                     score_a_coefs[row.AwayTeam], defend_h_coefs[row.HomeTeam]) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [149]:
def equality_constraint_1(params):
    return (sum(params[:n_teams]) - sum(params[n_teams:(2*n_teams)]))

In [150]:
def equality_constraint_2(params):
    return (sum(params[(2*n_teams):(3*n_teams)]) - sum(params[(3*n_teams):]))

In [151]:
opt_params_m4 = minimize(log_likelhood, init_vals, constraints=[{'type':'eq', 'fun': equality_constraint_1}, 
                                                                                     {'type':'eq', 'fun': equality_constraint_2}], options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Iteration limit reached    (Exit mode 9)
            Current function value: 1032.965843945449
            Iterations: 100
            Function evaluations: 8219
            Gradient evaluations: 100


In [153]:
parameters_m4 = dict(zip(["attack_h_"+team for team in teams] + 
                        ["defence_a_"+team for team in teams] +
                        ['attack_a_'+team for team in teams] +
                         ['defense_h_'+team for team in teams],
                        opt_params_m4.x))

In [154]:
Model_4_ll = opt_params_m4["fun"]
print(f"Model 1b Log-Likelihood: {Model_4_ll}")

Model 1b Log-Likelihood: 1032.965843945449


Model Specification: (Home Team Attack, Away Team Defense, Home Team Attack, Away Team Defense)

# Model 2: (alpha_i, beta_i, kappa\*beta_i, kappa\*alpha_i)

**Every team has 2 unique parameters:**
* Attack
* Defense  

**1 Global parameter:**
* kappa - home adv same for every team

In [121]:
init_vals = np.concatenate((
                              np.random.uniform(0,1,(n_teams)),  # attack strength
                              np.random.uniform(0,-1,(n_teams)),  # defence strength
                              [1.]  # Home effect
                             ))

In [122]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik, kappa):
    lambda_k = np.exp(alpha_ik + beta_jk + kappa)
    mu_k = np.exp(alpha_jk + beta_ik)
    
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [123]:
def log_likelhood(params):
    score_coefs = dict(zip(teams, params[:n_teams]))
    defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
    kappa = params[-1]
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, score_coefs[row.HomeTeam], defend_coefs[row.AwayTeam], 
                                     score_coefs[row.AwayTeam], defend_coefs[row.HomeTeam], kappa) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [124]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - sum(params[n_teams:-1]))

In [125]:
opt_params_m2 = minimize(log_likelhood, init_vals, constraints={'type':'eq', 'fun': equality_constraint}, options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1052.3376822815933
            Iterations: 53
            Function evaluations: 2309
            Gradient evaluations: 53


In [126]:
parameters_m2 = dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['home_adv'],
                        opt_params_m2.x))

In [127]:
Model_2_ll = opt_params_m2["fun"]
print(f"Model 2 Log-Likelihood: {Model_2_ll}")

Model 2 Log-Likelihood: 1052.3376822815933


# Model 0: (alpha, beta, gamma, delta)

**4 Global parameters:**
* Attack when at home
* Defense when at home
* Attack when away
* Defense when at home

In [99]:
init_vals = [1.,  # Home Attack
             -1.,  # Home Defense
             1.,  # Away Attack
             -1.]  # Away Defense

In [100]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [101]:
def log_likelhood(params):
    home_attack = params[0]
    home_defense = params[1]
    away_attack = params[2]
    away_defense = params[3]    
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, home_attack, away_defense, 
                                     away_attack, home_defense) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [102]:
def equality_constraint_1(params):
    return (params[0] - params[-1])

def equality_constraint_2(params):
    return (params[1] - params[2])

In [103]:
opt_params_m0 = minimize(log_likelhood, init_vals, 
                         constraints=[{'type':'eq', 'fun': equality_constraint_1}, {'type':'eq', 'fun': equality_constraint_2}], 
                         options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1155.5755005186918
            Iterations: 6
            Function evaluations: 36
            Gradient evaluations: 6


In [104]:
parameters = {}
for team in teams:
    parameters[f"attack_home_{team}"] = opt_params_m0.x[0]
    
for team in teams:
    parameters[f"defense_home_{team}"] = opt_params_m0.x[1]

for team in teams:
    parameters[f"attack_away_{team}"] = opt_params_m0.x[2]
    
for team in teams:
    parameters[f"defende_away_{team}"] = opt_params_m0.x[3]

In [105]:
Model_0_ll = opt_params_m0["fun"]
print(f"Model 0 Log-Likelihood: {Model_0_ll}")

Model 0 Log-Likelihood: 1155.5755005186918


# Model 1A: (alpha_i, beta, gamma, alpha_i)

**Every team has 1 unique parameters:**
* Attack  

**2 Global parameter:**
* Defense when away
* Defense at home

In [106]:
init_vals = np.concatenate((
                              np.random.uniform(0,1,(n_teams)),  # attack strength
                              [-1., -1.]  # defence strength at home, defence strength away
                             ))

In [107]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [108]:
def log_likelhood(params):
    score_coefs = dict(zip(teams, params[:n_teams]))
    defend_h_coefs, defend_a_coefs = params[-2], params[-1]
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, score_coefs[row.HomeTeam], defend_a_coefs, 
                                     score_coefs[row.AwayTeam], defend_h_coefs) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [109]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - n_teams*params[-1])

In [110]:
opt_params_m1a = minimize(log_likelhood, init_vals, constraints={'type':'eq', 'fun': equality_constraint}, options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1080.1266123528198
            Iterations: 29
            Function evaluations: 709
            Gradient evaluations: 29


In [111]:
parameters = dict(zip(["attack_"+team for team in teams],
                        opt_params_m1a.x[:n_teams]))
for team in teams:
    parameters[f"defense_home_{team}"] = opt_params_m1a.x[-2]
    
for team in teams:
    parameters[f"defense_away_{team}"] = opt_params_m1a.x[-1]

In [112]:
Model_1a_ll = opt_params_m1a["fun"]
print(f"Model 1a Log-Likelihood: {Model_1a_ll}")

Model 1a Log-Likelihood: 1080.1266123528198


# Model 1B: (alpha, beta_i, beta_i, delta)

**Every team has 1 unique parameters:**
* Defense  

**2 Global parameters:**
* Attack when away
* Attack at home

In [37]:
init_vals = np.concatenate((
                              np.random.uniform(0,-1,(n_teams)),  # defense strength
                              [1., 1.]  # attack strength at home, attack strength away
                             ))

In [38]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [39]:
def log_likelhood(params):
    defend_coefs = dict(zip(teams, params[:n_teams]))
    attack_h_coef, attack_a_coef = params[-2], params[-1]
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, attack_h_coef, defend_coefs[row.HomeTeam], 
                                     attack_a_coef, defend_coefs[row.AwayTeam]) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [113]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - n_teams*params[-2])

In [114]:
opt_params_m1b = minimize(log_likelhood, init_vals, constraints={'type':'eq', 'fun': equality_constraint}, options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1080.126612410209
            Iterations: 30
            Function evaluations: 733
            Gradient evaluations: 30


In [115]:
parameters = dict(zip(["defense_"+team for team in teams],
                        opt_params_m1a.x[:n_teams]))
for team in teams:
    parameters[f"attack_home_{team}"] = opt_params_m1a.x[-2]
    
for team in teams:
    parameters[f"attack_away_{team}"] = opt_params_m1a.x[-1]

In [116]:
Model_1b_ll = opt_params_m1b["fun"]
print(f"Model 1b Log-Likelihood: {Model_1b_ll}")

Model 1b Log-Likelihood: 1080.126612410209


# Model 3C: (alpha_i, beta_i, gamma_i, alpha_i)

**Every team has 3 unique parameters:**  
* Attack
* Defense at home  
* Defense away

In [44]:
init_vals = np.concatenate((
                              np.random.uniform(0,1,(n_teams)),  # attack strength
                              np.random.uniform(0,-1,(n_teams)),  # defence home strength
                              np.random.uniform(0,-1,(n_teams)),  # defence away strength
                             ))

In [45]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [46]:
def log_likelhood(params):
    score_coefs = dict(zip(teams, params[:n_teams]))
    defend_h_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
    defend_a_coefs = dict(zip(teams, params[(2*n_teams):]))
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, score_coefs[row.HomeTeam], defend_a_coefs[row.AwayTeam], 
                                     score_coefs[row.AwayTeam], defend_h_coefs[row.HomeTeam]) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [47]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - params[(2*n_teams):])

In [48]:
opt_params_m3c = minimize(log_likelhood, init_vals, constraints={'type':'eq', 'fun': equality_constraint}, options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1061.0918210038428
            Iterations: 54
            Function evaluations: 3358
            Gradient evaluations: 54


In [49]:
parameters_m3c = dict(zip(["attack_"+team for team in teams] + 
                        ["defence_h_"+team for team in teams] +
                        ['defence_a_'+team for team in teams],
                        opt_params_m3c.x))

In [50]:
Model_3c_ll = opt_params_m3c["fun"]
print(f"Model 1b Log-Likelihood: {Model_3c_ll}")

Model 1b Log-Likelihood: 1061.0918210038428


# Model 3D: (alpha_i, beta_i, beta_i, delta_i)

**Every team has 3 unique parameters:**  
* Defense
* Attack at home  
* Attack away

In [128]:
init_vals = np.concatenate((
                              np.random.uniform(0,-1,(n_teams)),  # defence strength
                              np.random.uniform(0,1,(n_teams)),  # attack home strength
                              np.random.uniform(0,1,(n_teams)),  # attack away strength
                             ))

In [129]:
def match_log_likelihood(x_k, y_k, alpha_ik, beta_jk, alpha_jk, beta_ik):
    lambda_k = np.exp(alpha_ik + beta_jk)
    mu_k = np.exp(alpha_jk + beta_ik)
    return (
        np.log(poisson.pmf(x_k, lambda_k))
        + np.log(poisson.pmf(y_k, mu_k))
    )

In [130]:
def log_likelhood(params):
    defend_coefs = dict(zip(teams, params[:n_teams]))
    attack_h_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
    attack_a_coefs = dict(zip(teams, params[(2*n_teams):]))
    
    log_like = [match_log_likelihood(row.FTHG, row.FTAG, attack_h_coefs[row.HomeTeam], defend_coefs[row.AwayTeam], 
                                     attack_a_coefs[row.AwayTeam], defend_coefs[row.HomeTeam]) 
                for row in dataset.itertuples()]
    
    return -sum(log_like)

In [131]:
def equality_constraint(params):
    return (sum(params[:n_teams]) - params[n_teams:(2*n_teams)])

In [132]:
opt_params_m3d = minimize(log_likelhood, init_vals, constraints={'type':'eq', 'fun': equality_constraint}, options={'disp': True, 'maxiter':100})

  np.log(poisson.pmf(x_k, lambda_k))
  + np.log(poisson.pmf(y_k, mu_k))


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1084.399235279352
            Iterations: 57
            Function evaluations: 3534
            Gradient evaluations: 57


In [133]:
parameters_m3d = dict(zip(["defense_"+team for team in teams] + 
                        ["attack_h_"+team for team in teams] +
                        ['attack_a_'+team for team in teams],
                        opt_params_m3d.x))

In [134]:
Model_3d_ll = opt_params_m3d["fun"]
print(f"Model 1b Log-Likelihood: {Model_3d_ll}")

Model 1b Log-Likelihood: 1084.399235279352


# 4 Goodness-of-fit-tests

Likelihood ratio test: difference between models log likelihood's is chi2(k=n-1) distributed

In [58]:
ll_increase_4_3d = Model_4_ll - Model_3d_ll
ll_increase_4_3c = Model_4_ll - Model_3c_ll
ll_increase_3d_2 = Model_3d_ll - Model_2_ll
ll_increase_3c_2 = Model_3c_ll - Model_2_ll
ll_increase_2_1b = Model_2_ll - Model_1b_ll
ll_increase_2_1a = Model_2_ll - Model_1a_ll
ll_increase_1b_0 = Model_1b_ll - Model_0_ll
ll_increase_1a_0 = Model_1a_ll - Model_0_ll

In [77]:
scipy.stats.chi2.ppf(0.95, n_teams-1)

30.14352720564616

In [98]:
scipy.stats.chi2.ppf(0.99, n_teams-1)

36.19086912927004

Model 2 is judged most appropriate and carried forward for chi-squared goodness-of-fit analysis

## Table 4.    Observed and expected frequencies of home and away scores

In [166]:
table_4_df = pd.DataFrame({"number_of_goals": [0, 1, 2, 3, ">=4"], 
                           "home_obs": [sum(dataset["FTHG"]==0), sum(dataset["FTHG"]==1), sum(dataset["FTHG"]==2), sum(dataset["FTHG"]==3), sum(dataset["FTHG"]>=4)], 
                           "home_exp": 0, 
                           "away_obs": [sum(dataset["FTAG"]==0), sum(dataset["FTAG"]==1), sum(dataset["FTAG"]==2), sum(dataset["FTAG"]==3), sum(dataset["FTAG"]>=4)], 
                           "away_exp": 0})

In [210]:
def calc_goals_pr(param_dict, homeTeam, awayTeam):
    means = [np.exp(param_dict['attack_'+homeTeam] + param_dict['defence_'+awayTeam] + param_dict['home_adv']),
     np.exp(param_dict['defence_'+homeTeam] + param_dict['attack_'+awayTeam])]
    
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, 4)] for team_avg in means]
    
    for i in range(len(means)):
        team_pred[i].append(1 - poisson.cdf(3, means[i]))
    
    return np.array(team_pred)

In [220]:
sum_array = np.zeros((2, 5))
for row in dataset.itertuples():
    sum_array += calc_goals_pr(parameters_m2, row.HomeTeam, row.AwayTeam)

In [222]:
table_4_df["home_exp"] = sum_array[0]
table_4_df["away_exp"] = sum_array[1]

In [227]:
table_4_df

Unnamed: 0,number_of_goals,home_obs,home_exp,away_obs,away_exp
0,0,90,100.402021,136,135.933295
1,1,126,118.079733,127,126.427083
2,2,91,81.412629,65,69.667116
3,3,35,44.021426,33,30.345152
4,>=4,38,36.084191,19,17.627354


### Chi-Squared goodness of fit test

In [230]:
table_5_df = table_4_df.copy()
table_5_df["home_t_stat"] = (table_5_df["home_obs"] - table_5_df["home_exp"])**2/table_5_df["home_exp"]
table_5_df["away_t_stat"] = (table_5_df["away_obs"] - table_5_df["away_exp"])**2/table_5_df["away_exp"]

In [240]:
home_t_stat = table_5_df["home_t_stat"].sum()
away_t_stat = table_5_df["away_t_stat"].sum()

In [244]:
home_p_val = 1-scipy.stats.chi2.cdf(home_t_stat, 3)
away_p_val = 1-scipy.stats.chi2.cdf(away_t_stat, 3)

0.883863312261404

Both p-values are > 0.05, so cannot reject that the observed goals come from an independent Poisson distribution at the 5% level

# 5 A bivariate Poisson model

## Table 6. 
**Observed and estimated frequencies for Z, the difference in the teams' scores, for (i) the independent Poisson model and (ii) the bivariate Possion with Q = 0.2**

In [424]:
dataset_1 = dataset.copy()
dataset_1["Z"] = dataset_1["FTHG"] - dataset_1["FTAG"]

In [425]:
table_6_df = pd.DataFrame({"Z": ["<=-3", -2, -1, 0, 1, 2, 3, 4,">=5"], 
                           "observed": [sum(dataset_1["Z"]<=-3), sum(dataset_1["Z"]==-2), 
                                        sum(dataset_1["Z"]==-1), sum(dataset_1["Z"]==0), 
                                        sum(dataset_1["Z"]==1), sum(dataset_1["Z"]==2), 
                                       sum(dataset_1["Z"]==3), sum(dataset_1["Z"]==4), 
                                       sum(dataset_1["Z"]>=5)]})

In [426]:
def calc_goals_pr_dict(param_dict, homeTeam, awayTeam, q=0):
    means = [np.exp(param_dict['attack_'+homeTeam] + param_dict['defence_'+awayTeam] + param_dict['home_adv']),
             np.exp(param_dict['defence_'+homeTeam] + param_dict['attack_'+awayTeam])]
    
    team_pred = []
    for mean in means:
        dicti = {}
        for i in range(0, 15):
            correction = q*np.sqrt(means[0]*means[1])
            expectation = mean - correction
            dicti[i] = poisson.pmf(i, expectation)
        
        team_pred.append(dicti)

    return team_pred

In [427]:
def get_goal_diff_pr(team_pred):
    out = {}
    for k_h, v_h in team_pred[0].items():
        for k_a, v_a in team_pred[1].items():
            diff = k_h - k_a
            if (diff <= -3):
                diff = "<=-3"
            elif (diff >= 5):
                diff = ">=5"
            
            if (out.get(diff, 0) == 0):
                out[diff] = v_h*v_a
            else:
                out[diff] += v_h*v_a
    return out

In [428]:
def add_to_dict(dic, dic_to_add):
    for k_b, v_b in dic_to_add.items():
        if dic.get(k_b, 0) == 0:
            dic[k_b] = v_b
        else:
            dic[k_b] += v_b

In [429]:
q0_sum_dict = {}
q200_sum_dict = {}
for row in dataset_1.itertuples():
    goals_dict = calc_goals_pr_dict(parameters_m2, row.HomeTeam, row.AwayTeam)
    goal_diff_dict = get_goal_diff_pr(goals_dict)
    add_to_dict(q0_sum_dict, goal_diff_dict)
    
    goals_dict = calc_goals_pr_dict(parameters_m2, row.HomeTeam, row.AwayTeam, 0.2)
    goal_diff_dict = get_goal_diff_pr(goals_dict)
    add_to_dict(q200_sum_dict, goal_diff_dict)

In [430]:
estimated_q0 = []
estimated_q200 = []
for row in table_6_df.itertuples():
    estimated_q0.append(q0_sum_dict[row.Z])
    estimated_q200.append(q200_sum_dict[row.Z])
    
table_6_df["estimated_q0"] = estimated_q0
table_6_df["estimated_q200"] = estimated_q200

In [431]:
table_6_df

Unnamed: 0,Z,observed,estimated_q0,estimated_q200
0,<=-3,29,23.452832,19.068829
1,-2,21,32.831202,29.945928
2,-1,58,62.2151,62.709352
3,0,99,87.917862,97.846938
4,1,83,76.100235,79.740538
5,2,41,48.852222,47.552976
6,3,27,26.335988,24.148715
7,4,12,12.741033,11.154098
8,>=5,10,9.553393,7.832575
