# Predictive Model 

This notebook contains the code to model the probabilities of the game results.

In [10]:
import pandas as pd 
import numpy as np
from scipy.stats import poisson
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [2]:
df = pd.read_csv("results_prepared")
df = df.drop('Unnamed: 0', axis=1)
df = df.drop(['matchweek','home_points','away_points','index'],axis=1)

In [3]:
# sum of goals in home games 
df_home = df.groupby(by=['home_team']).sum()
df_home = df_home.rename(columns={'home_goals': 'Goals_For', 'away_goals': 'Goals_Against'}) # rename columns

# sum of goals in away games 
df_away = df.groupby(by=['away_team']).sum()
df_away = df_away.rename(columns={'home_goals': 'Goals_Against', 'away_goals': 'Goals_For'}) # rename columns

# avg goals in home and away games 
df_home['AVG_Goals_For'] = df_home['Goals_For']/17
df_away['AVG_Goals_For'] = df_away['Goals_For']/17
df_home['AVG_Goals_Against'] = df_home['Goals_Against']/17
df_away['AVG_Goals_Against'] = df_away['Goals_Against']/17

# average league goals 
df_home['League_AVG_Goals_For'] = df_home['AVG_Goals_For'].sum() / 18
df_away['League_AVG_Goals_For'] = df_away['AVG_Goals_For'].sum() / 18
df_home['League_AVG_Goals_Against'] = df_home['AVG_Goals_Against'].sum() / 18
df_away['League_AVG_Goals_Against'] = df_away['AVG_Goals_Against'].sum() / 18
df_home['Team'] = df_home.index
df_away['Team'] = df_away.index

In [4]:
# calculate attacking & defensive strenth of each team 
df_strengths = pd.DataFrame()
df_strengths['Attacking_Strength_H'] = df_home['AVG_Goals_For'] / (df_home['AVG_Goals_For'].sum() / 18)
df_strengths['Defensive_Strength_H'] = df_home['AVG_Goals_Against'] / (df_home['AVG_Goals_Against'].sum() / 18)
df_strengths['Attacking_Strength_A'] = df_away['AVG_Goals_For'] / (df_away['AVG_Goals_For'].sum() / 18)
df_strengths['Defensive_Strength_A'] = df_away['AVG_Goals_Against'] / (df_away['AVG_Goals_Against'].sum() / 18)

# reformat dataframe
df_strengths = df_strengths.reset_index()
df_strengths = df_strengths.rename(columns={'home_team': 'Team'}) # rename columns

In [5]:
# clean df 
df = df.drop(['home_goals','away_goals'],axis=1)

In [6]:
# merge dataframe with games and team strengths
games_helper = pd.merge(df, df_strengths,  how='left', left_on=['home_team'], right_on = ['Team'])
games_strength = pd.merge(games_helper, df_strengths,  how='left', left_on=['away_team'], right_on = ['Team'])

In [7]:
# calculate goal expectancy
games_strength['home_goal_expect'] = games_strength['Attacking_Strength_H_x'] * games_strength['Defensive_Strength_A_y'] * float(df_home['League_AVG_Goals_For'][df_home['Team']=='Borussia Dortmund '])
games_strength['away_goal_expect'] = games_strength['Attacking_Strength_A_y'] * games_strength['Defensive_Strength_H_x'] * float(df_away['League_AVG_Goals_For'][df_away['Team']=='Borussia Dortmund '])

In [11]:
games_final = games_strength[['home_team','away_team','home_team_number','away_team_number','home_goal_expect','away_goal_expect','game_number']]
games_final

Unnamed: 0,home_team,away_team,home_team_number,away_team_number,home_goal_expect,away_goal_expect,game_number
0,1. FC Köln,Hertha BSC,0.0,12.0,2.015497,0.696527,0
1,1. FC Union Berlin,Bayer 04 Leverkusen,1.0,9.0,1.178653,1.778313,1
2,1. FSV Mainz 05,RB Leipzig,2.0,13.0,1.296519,0.813891,2
3,Arminia Bielefeld,SC Freiburg,3.0,14.0,0.618793,1.525726,3
4,Bor. Mönchengladbach,Bayern München,4.0,10.0,1.426170,3.375478,4
...,...,...,...,...,...,...,...
301,Arminia Bielefeld,RB Leipzig,3.0,13.0,0.589327,1.701772,301
302,Bayer 04 Leverkusen,SC Freiburg,9.0,14.0,1.608862,1.525726,302
303,FC Augsburg,SpVgg Greuther Fürth,6.0,15.0,2.406417,0.862367,303
304,Bor. Mönchengladbach,TSG Hoffenheim,4.0,16.0,2.333733,1.653296,304


In [12]:
# calculate probabilities for draw, home win and away win  
games_final['draw'] = (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect'])) + (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect'])) + (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect'])) + (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect'])) + (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))
games_final['home_win'] = (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=0, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=5, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))
games_final['away_win'] = (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=1, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=2, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=3, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=4, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=0, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=1, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=2, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=3, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))+ (poisson.pmf(k=4, mu=games_final['home_goal_expect']) * poisson.pmf(k=5, mu=games_final['away_goal_expect']))
games_final

Unnamed: 0,home_team,away_team,home_team_number,away_team_number,home_goal_expect,away_goal_expect,game_number,draw,home_win,away_win
0,1. FC Köln,Hertha BSC,0.0,12.0,2.015497,0.696527,0,0.197913,0.666356,0.118515
1,1. FC Union Berlin,Bayer 04 Leverkusen,1.0,9.0,1.178653,1.778313,1,0.233189,0.251174,0.504455
2,1. FSV Mainz 05,RB Leipzig,2.0,13.0,1.296519,0.813891,2,0.287032,0.476531,0.234034
3,Arminia Bielefeld,SC Freiburg,3.0,14.0,0.618793,1.525726,3,0.256707,0.149640,0.588777
4,Bor. Mönchengladbach,Bayern München,4.0,10.0,1.426170,3.375478,4,0.129969,0.120475,0.620122
...,...,...,...,...,...,...,...,...,...,...
301,Arminia Bielefeld,RB Leipzig,3.0,13.0,0.589327,1.701772,301,0.231059,0.124238,0.636630
302,Bayer 04 Leverkusen,SC Freiburg,9.0,14.0,1.608862,1.525726,302,0.236785,0.393345,0.358871
303,FC Augsburg,SpVgg Greuther Fürth,6.0,15.0,2.406417,0.862367,303,0.168762,0.679189,0.115724
304,Bor. Mönchengladbach,TSG Hoffenheim,4.0,16.0,2.333733,1.653296,304,0.197046,0.499258,0.265059


In [11]:
games_final.to_csv('games_probs')