# ANAYLSIS TEMPLATE
*This notebook intends to walk through the theory and calculations I am currently using in my model to calculate probability of 1 x 2 result and a table of over / under goal projections. I will break the calculations out into utility functions stored in `../src/utils` for ease of use in future explorations. This is to document the calculations.*

### Instructions:

1. Ensure all cells in all notebooks in `/notebooks` directory have been run.
2. Define `home_side` and `away_side` in first cell
3. Click 'run all'
4. Probability for 1 x 2 and over / under can be viewed in the last cell
5. *Suggestion:* copy and paste notebook to `../output/< matchday_directory >/` and rename file. IMPORTANT: Update file path in lines 14 and 15 if needed if you make a copy.



### Import League Data & Define Sides

Define 'home_side' and 'away_side'
Ensure correct output



In [None]:
import pandas as pd
from scipy.stats import poisson
import numpy as np

"""
BUG: if copying from `notebooks/03_analysis_template_notebook.ipynb` to a directory in output
for storage, ensure the file path for reading the csv will need to be updated from '../data/processed/home_table.csv'
to '../../data/processed/home_table.csv' if frames are run again.

TLDR
if you are in ./notebooks/03_analysis_tempalte.csv you may run this cell as is. If you are not, check that following
read_csv commands read from the correct directory.
"""
home_dataframe = pd.read_csv('../../data/processed/home_table.csv')
away_dataframe = pd.read_csv('../../data/processed/away_table.csv')

"""

!!! INPUT TEAMS HERE !!!

wrapped in quotes, eg.

home_side = "Arsenal"
away_side = "Everton"

If there is a ValueError,
(1) check spelling against the error message. Must be exact.
(2) check capitalization
(3) check that it is wrapped in quotes

"""
home_side = "Arsenal"
away_side = "Everton"

# validate input
clubs = set(home_dataframe['Team'].values)
if home_side not in clubs:
    raise ValueError(f"Invalid entry for 'home_side'. Expected one of {clubs}, but received '{home_side}'")
elif away_side not in clubs:
    raise ValueError(f"Invalid entry for 'away_side'. Expected one of {clubs}, but received '{away_side}'")
else:
    print(f"Ready to calculate probability for {home_side} v. {away_side}")

Ready to calculate probability for Arsenal v. Everton


### Calculate Projected Goals

In [None]:
# 2. grab their gpm_scored and gpm_conceded
home_side_gpm_scored = home_dataframe.loc[
    home_dataframe["Team"] == home_side,
    "gpm_scored"
].values[0]

home_side_gpm_conceded = home_dataframe.loc[
    home_dataframe["Team"] == home_side,
    "gpm_conceded"
].values[0]

print(home_side_gpm_scored, home_side_gpm_conceded)

away_side_gpm_scored = away_dataframe.loc[
    away_dataframe["Team"] == away_side,
    "gpm_scored"
].values[0]

away_side_gpm_conceded = away_dataframe.loc[
    away_dataframe["Team"] == away_side,
    "gpm_conceded"
].values[0]

print(away_side_gpm_scored, away_side_gpm_conceded)

# 3.
epl_home_avg_gpm_scored = round(sum(home_dataframe["gpm_scored"].values) / 20, 2)
epl_home_avg_gpm_conceded = round(sum(home_dataframe["gpm_conceded"].values) / 20, 2)
print(epl_home_avg_gpm_scored, epl_home_avg_gpm_conceded)

# 4. create a ratio (attack and defense rating) of their gpm to the league average gpm
home_attack_rating = home_side_gpm_scored / epl_home_avg_gpm_scored
away_defense_rating = away_side_gpm_conceded / epl_home_avg_gpm_scored
away_attack_rating = away_side_gpm_scored / epl_home_avg_gpm_conceded
home_defense_rating = home_side_gpm_conceded / epl_home_avg_gpm_conceded

print(home_attack_rating, away_defense_rating, away_attack_rating, home_defense_rating)

# home projected goals
home_projected_goals = home_attack_rating * away_defense_rating * epl_home_avg_gpm_scored
# away projected goals
away_projected_goals = away_attack_rating * home_defense_rating * epl_home_avg_gpm_conceded
print("\n ================== projected_goals ==================== \n")
print(f"HOME {home_side} projected goals:", home_projected_goals)
print(f"AWAY {away_side} projected goals: ", away_projected_goals)

2.4285714285714284 0.8571428571428571
0.7142857142857143 1.8571428571428568
1.63 1.38
1.4899211218229622 1.1393514460999121 0.5175983436853002 0.6211180124223603


HOME Arsenal projected goals: 2.766996369099786
AWAY Everton projected goals:  0.4436557231588288


### Poisson Distribution of Goal Probabilities

In [None]:
# Poisson probability for home team goals 0 - 8
home_score_prob = []
away_score_prob = []
for i in range(9):
    prob = poisson.pmf(i, home_projected_goals)
    home_score_prob.append(prob)
for i in range(9):
    prob = poisson.pmf(i, away_projected_goals)
    away_score_prob.append(prob)

home_score_prob = np.array(home_score_prob)
away_score_prob = np.array(away_score_prob)
# print(home_score_prob)
# print(away_score_prob)

exact_score_prob = np.outer(away_score_prob, home_score_prob)
print(f"{home_side} v. {away_side}", exact_score_prob)

Arsenal v. Everton [[4.03303056e-02 1.11593809e-01 1.54389832e-01 1.42398702e-01
  9.85041728e-02 5.45121377e-02 2.51391478e-02 9.93713297e-03
  3.43700136e-03]
 [1.78927709e-02 4.95092321e-02 6.84959328e-02 6.31759991e-02
  4.37019400e-02 2.41846219e-02 1.11531268e-02 4.40866592e-03
  1.52484532e-03]
 [3.96911511e-03 1.09825271e-02 1.51943063e-02 1.40141968e-02
  9.69430790e-03 5.36482295e-03 2.47407427e-03 9.77964932e-04
  3.38253177e-04]
 [5.86973544e-04 1.62415367e-03 2.24701365e-03 2.07249287e-03
  1.43364506e-03 7.93378136e-04 3.65879070e-04 1.44626580e-04
  5.00226526e-05]
 [6.51035431e-05 1.80141267e-04 2.49225116e-04 2.29868331e-04
  1.59011209e-04 8.79966876e-05 4.05810859e-05 1.60411025e-05
  5.54820903e-06]
 [5.77671190e-06 1.59841408e-05 2.21140298e-05 2.03964801e-05
  1.41092466e-05 7.80804682e-06 3.60080620e-06 1.42334538e-06
  4.92298938e-07]
 [4.27145216e-07 1.18190926e-06 1.63516932e-06 1.50816919e-06
  1.04327467e-06 5.77347443e-07 2.66253046e-07 1.05245887e-07
  3.6

### Under Market

In [None]:
thresholds = [0.5, 1.5, 2.5, 3.5]
sums = []

for t in thresholds:
    mask = np.add.outer(
        np.arange(
            exact_score_prob.shape[0]
        ),
        np.arange(
            exact_score_prob.shape[1]
        )
    ) <= t
    # print(mask)
    sums.append(exact_score_prob[mask].sum())

# print(sums)
under_market_df = pd.DataFrame({
    "Goals": thresholds,
    "Prob": sums
})
under_market_df["Implied Odds"] = 1 / under_market_df["Prob"]
print("~  ~ under market ~  ~\n", f"{home_side} v. {away_side}\n", under_market_df)

~  ~ under market ~  ~
 Arsenal v. Everton
    Goals      Prob  Implied Odds
0    0.5  0.040330     24.795250
1    1.5  0.169817      5.888696
2    2.5  0.377685      2.647709
3    3.5  0.600149      1.666252


In [None]:
thresholds = [0.5, 1.5, 2.5, 3.5]
sums = []

for t in thresholds:
    mask = np.add.outer(
        np.arange(
            exact_score_prob.shape[0]
        ),
        np.arange(
            exact_score_prob.shape[1]
        )
    ) >= t
    sums.append(exact_score_prob[mask].sum())

# print(sums)
over_market_df = pd.DataFrame({
    "Goals": thresholds,
    "Prob": sums
})

over_market_df["Implied Odds"] = 1 / over_market_df["Prob"]
print("~  ~ over market ~  ~\n", f"{home_side} v. {away_side}\n", over_market_df)

~  ~ over market ~  ~
 Arsenal v. Everton
    Goals      Prob  Implied Odds
0    0.5  0.957419      1.044474
1    1.5  0.827933      1.207828
2    2.5  0.620065      1.612735
3    3.5  0.397600      2.515088


### 1 x 2 Probability

In [None]:
# Poisson probability for home team goals 0 - 8
home_score_prob = []
away_score_prob = []
for i in range(9):
    prob = poisson.pmf(i, home_projected_goals)
    home_score_prob.append(prob)
for i in range(9):
    prob = poisson.pmf(i, away_projected_goals)
    away_score_prob.append(prob)

home_score_prob = np.array(home_score_prob)
away_score_prob = np.array(away_score_prob)
# print(home_score_prob)
# print(away_score_prob)

exact_score_prob = np.outer(away_score_prob, home_score_prob)

# print(exact_score_prob)
home_win_prob = np.sum(np.triu(exact_score_prob, k=1))
away_win_prob = np.sum(np.tril(exact_score_prob, k=-1))
draw_prob = np.trace(exact_score_prob)

# prepare data in readable dataframe
data = {
    "winner": [f"{home_side} (1)", "Draw (x)", f"{away_side} (2)"],
    "prob": [home_win_prob, draw_prob, away_win_prob],
}
result_df = pd.DataFrame(data)
result_df['implied_odds'] = 1 / result_df['prob']
print("      ~  ~ moneyline market ~  ~\n", result_df)

      ~  ~ moneyline market ~  ~
         winner      prob  implied_odds
0  Arsenal (1)  0.852364      1.173208
1     Draw (x)  0.107273      9.321973
2  Everton (2)  0.038112     26.238393


# Full Results Summary:

In [None]:
print(f"CALCULATED PROBABILITIES FOR {home_side} v. {away_side}")
print("\n\n        ~ ~ moneyline market ~ ~\n\n", result_df)
print("\n\n          ~ ~ over market ~ ~ \n\n", over_market_df)
print("\n\n          ~ ~ under market ~ ~ \n\n", under_market_df)

CALCULATED PROBABILITIES FOR Arsenal v. Everton


        ~ ~ moneyline market ~ ~

         winner      prob  implied_odds
0  Arsenal (1)  0.852364      1.173208
1     Draw (x)  0.107273      9.321973
2  Everton (2)  0.038112     26.238393


          ~ ~ over market ~ ~ 

    Goals      Prob  Implied Odds
0    0.5  0.957419      1.044474
1    1.5  0.827933      1.207828
2    2.5  0.620065      1.612735
3    3.5  0.397600      2.515088


          ~ ~ under market ~ ~ 

    Goals      Prob  Implied Odds
0    0.5  0.040330     24.795250
1    1.5  0.169817      5.888696
2    2.5  0.377685      2.647709
3    3.5  0.600149      1.666252
