# AFL Tipping Predictor Working Notebook
- Based off work completed by David Sheehan for Soccer
- By Glen Willis

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson, skellam
import os

In [2]:
afl_results_2019 = pd.read_csv(r"C:\Users\Beefsports\Documents\GitHub\AFLTippingPredictor\SourceData\afl-2019-AUSEasternStandardTime_results.csv")

In [3]:
afl_results_2019.head()

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result
0,1,21/03/2019 19:25,MCG,Carlton,Richmond,64 - 97
1,1,22/03/2019 19:50,MCG,Collingwood,Geelong Cats,65 - 72
2,1,23/03/2019 13:45,MCG,Melbourne,Port Adelaide,61 - 87
3,1,23/03/2019 16:35,Adelaide Oval,Adelaide Crows,Hawthorn,55 - 87
4,1,23/03/2019 19:25,Marvel Stadium,Western Bulldogs,Sydney Swans,82 - 65


In [4]:
afl_results_2019_scores = afl_results_2019[['Home Team', 'Away Team', 'Result']] # Only take Home Team, Away Team and Result
afl_results_2019_scores = afl_results_2019_scores.rename(columns={'Home Team': 'HomeTeam', 'Away Team': 'AwayTeam', 'Result':'FullScore'})

In [5]:
afl_results_2019_scores.head()

Unnamed: 0,HomeTeam,AwayTeam,FullScore
0,Carlton,Richmond,64 - 97
1,Collingwood,Geelong Cats,65 - 72
2,Melbourne,Port Adelaide,61 - 87
3,Adelaide Crows,Hawthorn,55 - 87
4,Western Bulldogs,Sydney Swans,82 - 65


In [6]:
afl_results_2019_split_scores = pd.DataFrame(afl_results_2019_scores) # Make pandas dataframe
# Split out FullScore into Home Score and AwayScore columns by using - as a delimiter
afl_results_2019_split_scores[['HomeScore','AwayScore']] = afl_results_2019_split_scores.FullScore.str.split(" - ",expand=True)
afl_results_2019_split_scores.head()

Unnamed: 0,HomeTeam,AwayTeam,FullScore,HomeScore,AwayScore
0,Carlton,Richmond,64 - 97,64,97
1,Collingwood,Geelong Cats,65 - 72,65,72
2,Melbourne,Port Adelaide,61 - 87,61,87
3,Adelaide Crows,Hawthorn,55 - 87,55,87
4,Western Bulldogs,Sydney Swans,82 - 65,82,65


In [7]:
# Drop FullScore column because we don't need it anymore
afl_results_2019_split_scores = afl_results_2019_split_scores.drop(columns='FullScore')
afl_results_2019_split_scores.head()

Unnamed: 0,HomeTeam,AwayTeam,HomeScore,AwayScore
0,Carlton,Richmond,64,97
1,Collingwood,Geelong Cats,65,72
2,Melbourne,Port Adelaide,61,87
3,Adelaide Crows,Hawthorn,55,87
4,Western Bulldogs,Sydney Swans,82,65


In [8]:
# Need to coerce the HomeScore and AwayScore results to numeric values as previously were strings - won't work with the poisson model
afl_results_2019_split_scores['HomeScore'] = pd.to_numeric(afl_results_2019_split_scores['HomeScore'],errors='coerce')
afl_results_2019_split_scores['AwayScore'] = pd.to_numeric(afl_results_2019_split_scores['AwayScore'],errors='coerce')

# afl_results_2019_split_scores.applymap(np.isreal)

In [9]:
# Importing the tools required for the Poisson regression model - average points scored
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Below if home team then make HomeTeam team and AwayTeam opponent and vice versa
afl_tipping_model_data = pd.concat([afl_results_2019_split_scores[['HomeTeam','AwayTeam','HomeScore']].assign(home=1).rename(
            columns={'HomeTeam':'team','AwayTeam':'opponent','HomeScore':'score'}),
           afl_results_2019_split_scores[['AwayTeam','HomeTeam','AwayScore']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayScore':'score'})])

afl_tipping_model_data.head()

Unnamed: 0,team,opponent,score,home
0,Carlton,Richmond,64,1
1,Collingwood,Geelong Cats,65,1
2,Melbourne,Port Adelaide,61,1
3,Adelaide Crows,Hawthorn,55,1
4,Western Bulldogs,Sydney Swans,82,1


In [10]:
# Coerce score to numeric
afl_tipping_model_data['score'] = pd.to_numeric(afl_tipping_model_data['score'],errors='coerce')
afl_tipping_model_data.head()

Unnamed: 0,team,opponent,score,home
0,Carlton,Richmond,64,1
1,Collingwood,Geelong Cats,65,1
2,Melbourne,Port Adelaide,61,1
3,Adelaide Crows,Hawthorn,55,1
4,Western Bulldogs,Sydney Swans,82,1


In [11]:
# Creates a poisson model using the statsmodels generalised linear model (glm) with score as the dependent variable and
# home (i.e. if home team or not), team and opponent as input variables (R-style syntax), data is above 
# and family specifies as Poisson
# .fit() fits a generalised linear model for a given family

afl_2019_poisson_model = smf.glm(formula="score ~ home + team + opponent", data=afl_tipping_model_data, 
                        family=sm.families.Poisson()).fit()
# Gives a glm regression result summary
afl_2019_poisson_model.summary()

0,1,2,3
Dep. Variable:,score,No. Observations:,414
Model:,GLM,Df Residuals:,378
Model Family:,Poisson,Df Model:,35
Link Function:,log,Scale:,1.0000
Method:,IRLS,Log-Likelihood:,-2334.0
Date:,"Thu, 12 Mar 2020",Deviance:,2109.1
Time:,08:43:48,Pearson chi2:,2.07e+03
No. Iterations:,4,Covariance Type:,nonrobust

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.3611,0.035,125.673,0.000,4.293,4.429
team[T.Brisbane Lions],0.1059,0.032,3.285,0.001,0.043,0.169
team[T.Carlton],-0.1043,0.035,-3.020,0.003,-0.172,-0.037
team[T.Collingwood],0.0384,0.033,1.172,0.241,-0.026,0.103
team[T.Essendon],-0.0339,0.034,-1.002,0.317,-0.100,0.032
team[T.Fremantle],-0.1190,0.035,-3.431,0.001,-0.187,-0.051
team[T.GWS Giants],0.0583,0.032,1.817,0.069,-0.005,0.121
team[T.Geelong Cats],0.0906,0.032,2.815,0.005,0.028,0.154
team[T.Gold Coast Suns],-0.2632,0.036,-7.242,0.000,-0.334,-0.192


- Values in the coef column above - analogous to the slopes in linear regression
- Take exponent of parameter values P(x) = e^coeff
- Positive value implies higher score while values closer to zero represent more neutral effects
- Home has a coef of 0.0523 - captures the fact that home teams generally score more points than the away team - e.g. e^0.0523 = 1.05 times more likely
- West Coast Eagles has coef of 0.0899 and St Kilda has -0.0803 which means the Eagles are better and St Kilda are much worse scorers
- Opponent coef values (e.g. opponent[T.St Kilda]]) penalise/reward teams based on the quality of the opposition
- Reflects defensive strength of each team
- In other words you're less likely to score against Hawthorn (with a negative value of -0.1179)
- Against Gold Coast Suns the value is 0.2222 - more likely to score

In [12]:
# Make a function to simulate any match

def simulate_match(tipping_model, homeTeam, awayTeam, max_score=250):
    home_score_avg = tipping_model.predict(pd.DataFrame(data={'team': homeTeam,
                                                             'opponent': awayTeam,'home':1},
                                                       index=[1])).values[0]
    away_score_avg = tipping_model.predict(pd.DataFrame(data={'team': awayTeam,
                                                             'opponent': homeTeam,
                                                             'home':0},
                                                       index=[1])).values[0]
    # List comprehension to calculate the predicted team matrix (Rows are home team, Columns are away team and diagonal is chance of a draw)
    team_pred = [[poisson.pmf(i, team_avg) for i in range (0, max_score+1)] for team_avg in [home_score_avg, away_score_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

In [13]:
eaglesvsbulldogs = simulate_match(afl_2019_poisson_model, 'West Coast Eagles', 'Western Bulldogs')

In [14]:
# Chance of Eagles Win
np.sum(np.tril(eaglesvsbulldogs, -1))
# np.tril returns the lower triangle of an array (eaglesvsbulldogs) and index = -1 refers to the diagonal for the triangle
# -1 means 1 below the main diagonal
# np.sum adds up the returned triangle

0.7846209447843633

In [15]:
# Chance of draw
np.sum(np.diag(eaglesvsbulldogs))
# Sums up the diagonal as this is 0-0, 1-1, 2-2 etc # i.e. not likely

0.02138365094072478

In [16]:
# Chance of Bulldogs Win
np.sum(np.triu(eaglesvsbulldogs, 1))
# np.triu returns the upper triangle of an array (eaglesvsbulldogs)
# 1 means 1 above the main diagonal
# np.sum adds up the returned triangle

0.19399540427491938

In [17]:
# Load test data
afl_results_2009_to_2018 = pd.read_csv(r"C:\Users\Beefsports\Documents\GitHub\AFLTippingPredictor\SourceData\AFL2009to2018.csv", encoding = "ISO-8859-1")
afl_results_2009_to_2018_results = pd.DataFrame(afl_results_2009_to_2018)


In [18]:
afl_results_2009_to_2018.head()

Unnamed: 0,Date,Kick Off (local),Home Team,Away Team,Venue,Home Score,Away Score,Play Off Game?,Home Goals,Home Behinds,...,Total Score Close,Total Score Over Open,Total Score Over Min,Total Score Over Max,Total Score Over Close,Total Score Under Open,Total Score Under Min,Total Score Under Max,Total Score Under Close,Notes
0,29-Sep-18,14:30,West Coast,Collingwood,MCG,79,74,Y,11,13,...,162.5,1.9,1.9,1.93,1.93,1.9,1.9,1.93,1.93,
1,22-Sep-18,13:20,West Coast,Melbourne,Optus Stadium,121,55,Y,18,13,...,173.5,1.9,1.9,1.9,1.9,1.9,1.9,1.9,1.9,
2,21-Sep-18,19:50,Richmond,Collingwood,MCG,58,97,Y,8,10,...,161.5,1.9,1.9,1.9,1.9,1.9,1.9,1.9,1.9,
3,15-Sep-18,19:25,Collingwood,GWS Giants,MCG,69,59,Y,9,15,...,150.5,1.9,1.9,1.9,1.9,1.9,1.9,1.9,1.9,
4,14-Sep-18,19:50,Hawthorn,Melbourne,MCG,71,104,Y,10,11,...,165.5,1.9,1.9,1.9,1.9,1.9,1.9,1.9,1.9,


In [19]:
afl_results_2009_to_2018_results = afl_results_2009_to_2018[['Date', 'Home Team', 'Away Team', 'Home Score', 'Away Score', 'Home Odds', 'Away Odds', 'Play Off Game?']] 
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.rename(columns={'Home Team': 'HomeTeam', 'Away Team': 'AwayTeam', 'Play Off Game?':'FinalsGame'})
afl_results_2009_to_2018_results.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,FinalsGame
0,29-Sep-18,West Coast,Collingwood,79,74,2.24,1.67,Y
1,22-Sep-18,West Coast,Melbourne,121,55,1.76,2.06,Y
2,21-Sep-18,Richmond,Collingwood,58,97,1.39,2.98,Y
3,15-Sep-18,Collingwood,GWS Giants,69,59,1.57,2.41,Y
4,14-Sep-18,Hawthorn,Melbourne,71,104,2.83,1.43,Y


In [20]:
# Drop all finals games from this model as footy tipping is only completed for the home and away season
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results[afl_results_2009_to_2018_results.FinalsGame != 'Y']
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.drop(columns='FinalsGame')
afl_results_2009_to_2018_results.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds
9,26-Aug-18,St Kilda,North Melbourne,94,117,3.15,1.35
10,26-Aug-18,Melbourne,GWS Giants,102,57,1.45,2.68
11,26-Aug-18,Brisbane,West Coast,72,98,2.2,1.66
12,25-Aug-18,Carlton,Adelaide,61,165,5.25,1.14
13,25-Aug-18,Sydney,Hawthorn,74,83,2.04,1.8


In [21]:
# Split out Date into Day, Month, Year columns by using - as a delimiter
afl_results_2009_to_2018_results[['Day','Month', 'Year']] = afl_results_2009_to_2018_results.Date.str.split("-",expand=True)
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.drop(columns='Date')
afl_results_2009_to_2018_results.head()

Unnamed: 0,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,Day,Month,Year
9,St Kilda,North Melbourne,94,117,3.15,1.35,26,Aug,18
10,Melbourne,GWS Giants,102,57,1.45,2.68,26,Aug,18
11,Brisbane,West Coast,72,98,2.2,1.66,26,Aug,18
12,Carlton,Adelaide,61,165,5.25,1.14,25,Aug,18
13,Sydney,Hawthorn,74,83,2.04,1.8,25,Aug,18


In [22]:
# Move Year, Month, Day to the front of the dataframe
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results[ ['Day'] + [ col for col in afl_results_2009_to_2018_results.columns if col != 'Day' ] ]
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results[ ['Month'] + [ col for col in afl_results_2009_to_2018_results.columns if col != 'Month' ] ]
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results[ ['Year'] + [ col for col in afl_results_2009_to_2018_results.columns if col != 'Year' ] ]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68
11,18,Aug,26,Brisbane,West Coast,72,98,2.2,1.66
12,18,Aug,25,Carlton,Adelaide,61,165,5.25,1.14
13,18,Aug,25,Sydney,Hawthorn,74,83,2.04,1.8


In [23]:
# Convert odds to percentage
afl_results_2009_to_2018_results['HomeOddsPercent'] = [1 / home_odds for home_odds in afl_results_2009_to_2018_results['Home Odds']]
afl_results_2009_to_2018_results['AwayOddsPercent'] = [1 / home_odds for home_odds in afl_results_2009_to_2018_results['Away Odds']]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134
11,18,Aug,26,Brisbane,West Coast,72,98,2.2,1.66,0.454545,0.60241
12,18,Aug,25,Carlton,Adelaide,61,165,5.25,1.14,0.190476,0.877193
13,18,Aug,25,Sydney,Hawthorn,74,83,2.04,1.8,0.490196,0.555556


In [24]:
# Add actual winner to the test data column using a list comprehension

afl_results_2009_to_2018_results['ActualWinner'] = ['Home' if homescore >= awayscore
                                                    else 'Away' for (homescore, awayscore) in zip(afl_results_2009_to_2018_results['Home Score'], afl_results_2009_to_2018_results['Away Score'])]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home
11,18,Aug,26,Brisbane,West Coast,72,98,2.2,1.66,0.454545,0.60241,Away
12,18,Aug,25,Carlton,Adelaide,61,165,5.25,1.14,0.190476,0.877193,Away
13,18,Aug,25,Sydney,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away


In [25]:
np.sort(afl_results_2009_to_2018_results.HomeTeam.unique())

array(['Adelaide', 'Brisbane', 'Carlton', 'Collingwood', 'Essendon',
       'Fremantle', 'GWS Giants', 'Geelong', 'Gold Coast', 'Hawthorn',
       'Melbourne', 'North Melbourne', 'Port Adelaide', 'Richmond',
       'St Kilda', 'Sydney', 'West Coast', 'Western Bulldogs'],
      dtype=object)

In [26]:
np.sort(afl_tipping_model_data.team.unique())

array(['Adelaide Crows', 'Brisbane Lions', 'Carlton', 'Collingwood',
       'Essendon', 'Fremantle', 'GWS Giants', 'Geelong Cats',
       'Gold Coast Suns', 'Hawthorn', 'Melbourne', 'North Melbourne',
       'Port Adelaide', 'Richmond', 'St Kilda', 'Sydney Swans',
       'West Coast Eagles', 'Western Bulldogs'], dtype=object)

In [27]:
afl_results_2009_to_2018_results['PredictedWinnerHomeGroundAdv'] = 'Home'
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner,PredictedWinnerHomeGroundAdv
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away,Home
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home,Home
11,18,Aug,26,Brisbane,West Coast,72,98,2.2,1.66,0.454545,0.60241,Away,Home
12,18,Aug,25,Carlton,Adelaide,61,165,5.25,1.14,0.190476,0.877193,Away,Home
13,18,Aug,25,Sydney,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away,Home


In [28]:
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('Adelaide','Adelaide Crows')
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('Brisbane','Brisbane Lions')
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('Geelong','Geelong Cats')
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('Gold Coast','Gold Coast Suns')
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('Sydney','Sydney Swans')
afl_results_2009_to_2018_results = afl_results_2009_to_2018_results.replace('West Coast','West Coast Eagles')

In [29]:
afl_results_2009_to_2018_results['HomeOddsPercentPred'] = [np.sum(np.tril(simulate_match(afl_2019_poisson_model, homeTeam, awayTeam), -1)) for (homeTeam, awayTeam) 
                                                           in zip(afl_results_2009_to_2018_results['HomeTeam'], afl_results_2009_to_2018_results['AwayTeam'])]

afl_results_2009_to_2018_results['AwayOddsPercentPred'] = [np.sum(np.triu(simulate_match(afl_2019_poisson_model, homeTeam, awayTeam), 1)) for (homeTeam, awayTeam) 
                                                           in zip(afl_results_2009_to_2018_results['HomeTeam'], afl_results_2009_to_2018_results['AwayTeam'])]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner,PredictedWinnerHomeGroundAdv,HomeOddsPercentPred,AwayOddsPercentPred
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away,Home,0.123568,0.860111
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home,Home,0.031112,0.962931
11,18,Aug,26,Brisbane Lions,West Coast Eagles,72,98,2.2,1.66,0.454545,0.60241,Away,Home,0.608061,0.362981
12,18,Aug,25,Carlton,Adelaide Crows,61,165,5.25,1.14,0.190476,0.877193,Away,Home,0.192628,0.784898
13,18,Aug,25,Sydney Swans,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away,Home,0.294868,0.676104


In [30]:
afl_results_2009_to_2018_results['PredictedWinnerModel'] = ['Home' if homeoddspred >= awayoddspred
                                                    else 'Away' for (homeoddspred, awayoddspred) in zip(afl_results_2009_to_2018_results['HomeOddsPercentPred'], afl_results_2009_to_2018_results['AwayOddsPercentPred'])]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner,PredictedWinnerHomeGroundAdv,HomeOddsPercentPred,AwayOddsPercentPred,PredictedWinnerModel
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away,Home,0.123568,0.860111,Away
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home,Home,0.031112,0.962931,Away
11,18,Aug,26,Brisbane Lions,West Coast Eagles,72,98,2.2,1.66,0.454545,0.60241,Away,Home,0.608061,0.362981,Home
12,18,Aug,25,Carlton,Adelaide Crows,61,165,5.25,1.14,0.190476,0.877193,Away,Home,0.192628,0.784898,Away
13,18,Aug,25,Sydney Swans,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away,Home,0.294868,0.676104,Away


In [31]:
afl_results_2009_to_2018_results['PredictionCorrect'] = ['Yes' if PredictedWinner == ActualWinner
                                                    else 'No' for (PredictedWinner, ActualWinner) in zip(afl_results_2009_to_2018_results['PredictedWinnerModel'], afl_results_2009_to_2018_results['ActualWinner'])]
afl_results_2009_to_2018_results['PredictionCorrectHomeGround'] = ['Yes' if PredictedWinnerHomeGroundAdv == ActualWinner
                                                    else 'No' for (PredictedWinnerHomeGroundAdv, ActualWinner) in zip(afl_results_2009_to_2018_results['PredictedWinnerHomeGroundAdv'], afl_results_2009_to_2018_results['ActualWinner'])]
afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner,PredictedWinnerHomeGroundAdv,HomeOddsPercentPred,AwayOddsPercentPred,PredictedWinnerModel,PredictionCorrect,PredictionCorrectHomeGround
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away,Home,0.123568,0.860111,Away,Yes,No
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home,Home,0.031112,0.962931,Away,No,Yes
11,18,Aug,26,Brisbane Lions,West Coast Eagles,72,98,2.2,1.66,0.454545,0.60241,Away,Home,0.608061,0.362981,Home,No,No
12,18,Aug,25,Carlton,Adelaide Crows,61,165,5.25,1.14,0.190476,0.877193,Away,Home,0.192628,0.784898,Away,Yes,No
13,18,Aug,25,Sydney Swans,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away,Home,0.294868,0.676104,Away,Yes,No


In [32]:
afl_results_2009_to_2018_results['PredictedWinnerOdds'] = ['Home' if HomeOddsPercent >= AwayOddsPercent
                                                    else 'Away' for (HomeOddsPercent, AwayOddsPercent) in zip(afl_results_2009_to_2018_results['HomeOddsPercent'], afl_results_2009_to_2018_results['AwayOddsPercent'])]

afl_results_2009_to_2018_results['PredictionCorrectOdds'] = ['Yes' if PredictedWinnerOdds == ActualWinner
                                                    else 'No' for (PredictedWinnerOdds, ActualWinner) in zip(afl_results_2009_to_2018_results['PredictedWinnerOdds'], afl_results_2009_to_2018_results['ActualWinner'])]

afl_results_2009_to_2018_results.head()

Unnamed: 0,Year,Month,Day,HomeTeam,AwayTeam,Home Score,Away Score,Home Odds,Away Odds,HomeOddsPercent,AwayOddsPercent,ActualWinner,PredictedWinnerHomeGroundAdv,HomeOddsPercentPred,AwayOddsPercentPred,PredictedWinnerModel,PredictionCorrect,PredictionCorrectHomeGround,PredictedWinnerOdds,PredictionCorrectOdds
9,18,Aug,26,St Kilda,North Melbourne,94,117,3.15,1.35,0.31746,0.740741,Away,Home,0.123568,0.860111,Away,Yes,No,Away,Yes
10,18,Aug,26,Melbourne,GWS Giants,102,57,1.45,2.68,0.689655,0.373134,Home,Home,0.031112,0.962931,Away,No,Yes,Home,Yes
11,18,Aug,26,Brisbane Lions,West Coast Eagles,72,98,2.2,1.66,0.454545,0.60241,Away,Home,0.608061,0.362981,Home,No,No,Away,Yes
12,18,Aug,25,Carlton,Adelaide Crows,61,165,5.25,1.14,0.190476,0.877193,Away,Home,0.192628,0.784898,Away,Yes,No,Away,Yes
13,18,Aug,25,Sydney Swans,Hawthorn,74,83,2.04,1.8,0.490196,0.555556,Away,Home,0.294868,0.676104,Away,Yes,No,Away,Yes


In [33]:
afl_results_2009_to_2018_predicted_pivot_pred = afl_results_2009_to_2018_results.pivot_table(afl_results_2009_to_2018_results, index = 'Year', columns = ['PredictionCorrect'], aggfunc='size')
afl_results_2009_to_2018_predicted_pivot_home = afl_results_2009_to_2018_results.pivot_table(afl_results_2009_to_2018_results, index = 'Year', columns = ['PredictionCorrectHomeGround'], aggfunc='size')
afl_results_2009_to_2018_predicted_pivot_odds = afl_results_2009_to_2018_results.pivot_table(afl_results_2009_to_2018_results, index = 'Year', columns = ['PredictionCorrectOdds'], aggfunc='size')

In [34]:
afl_results_2009_to_2018_predicted_pivot_pred['Total'] = [ No + Yes for (No, Yes) in zip(afl_results_2009_to_2018_predicted_pivot_pred['No'], afl_results_2009_to_2018_predicted_pivot_pred['Yes'])]
afl_results_2009_to_2018_predicted_pivot_pred['PercentageCorrectPoisson'] = [ Yes / Total for (Yes, Total) in zip(afl_results_2009_to_2018_predicted_pivot_pred['Yes'], afl_results_2009_to_2018_predicted_pivot_pred['Total'])]
afl_results_2009_to_2018_predicted_pivot_pred.head()

PredictionCorrect,No,Yes,Total,PercentageCorrectPoisson
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,43,49,92,0.532609
10,79,97,176,0.551136
11,74,113,187,0.604278
12,81,117,198,0.590909
13,79,119,198,0.60101


In [35]:
afl_results_2009_to_2018_predicted_pivot_home['Total'] = [ No + Yes for (No, Yes) in zip(afl_results_2009_to_2018_predicted_pivot_home['No'], afl_results_2009_to_2018_predicted_pivot_home['Yes'])]
afl_results_2009_to_2018_predicted_pivot_home['PercentageCorrectHomeGround'] = [ Yes / Total for (Yes, Total) in zip(afl_results_2009_to_2018_predicted_pivot_home['Yes'], afl_results_2009_to_2018_predicted_pivot_home['Total'])]
afl_results_2009_to_2018_predicted_pivot_home.head()

PredictionCorrectHomeGround,No,Yes,Total,PercentageCorrectHomeGround
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,35,57,92,0.619565
10,69,107,176,0.607955
11,82,105,187,0.561497
12,88,110,198,0.555556
13,88,110,198,0.555556


In [36]:
afl_results_2009_to_2018_predicted_pivot_odds['Total'] = [ No + Yes for (No, Yes) in zip(afl_results_2009_to_2018_predicted_pivot_odds['No'], afl_results_2009_to_2018_predicted_pivot_odds['Yes'])]
afl_results_2009_to_2018_predicted_pivot_odds['PercentageCorrectOdds'] = [ Yes / Total for (Yes, Total) in zip(afl_results_2009_to_2018_predicted_pivot_odds['Yes'], afl_results_2009_to_2018_predicted_pivot_odds['Total'])]
afl_results_2009_to_2018_predicted_pivot_odds.head()

PredictionCorrectOdds,No,Yes,Total,PercentageCorrectOdds
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,32,60,92,0.652174
10,67,109,176,0.619318
11,43,144,187,0.770053
12,43,155,198,0.782828
13,50,148,198,0.747475


In [37]:
combined_prediction_results_table = pd.concat([afl_results_2009_to_2018_predicted_pivot_pred,afl_results_2009_to_2018_predicted_pivot_home, afl_results_2009_to_2018_predicted_pivot_odds], axis = 1)
combined_prediction_results_table.head()

Unnamed: 0_level_0,No,Yes,Total,PercentageCorrectPoisson,No,Yes,Total,PercentageCorrectHomeGround,No,Yes,Total,PercentageCorrectOdds
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
9,43,49,92,0.532609,35,57,92,0.619565,32,60,92,0.652174
10,79,97,176,0.551136,69,107,176,0.607955,67,109,176,0.619318
11,74,113,187,0.604278,82,105,187,0.561497,43,144,187,0.770053
12,81,117,198,0.590909,88,110,198,0.555556,43,155,198,0.782828
13,79,119,198,0.60101,88,110,198,0.555556,50,148,198,0.747475


In [38]:
combined_prediction_results_table = combined_prediction_results_table.drop(columns=['No', 'Yes', 'Total'])

In [39]:
combined_prediction_results_table.head()

Unnamed: 0_level_0,PercentageCorrectPoisson,PercentageCorrectHomeGround,PercentageCorrectOdds
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9,0.532609,0.619565,0.652174
10,0.551136,0.607955,0.619318
11,0.604278,0.561497,0.770053
12,0.590909,0.555556,0.782828
13,0.60101,0.555556,0.747475


In [40]:
combined_prediction_results_table['BestPredictor'] = [ 'Poisson2019Pred' if PercentageCorrectPoisson > PercentageCorrectHomeGround and PercentageCorrectPoisson > PercentageCorrectOdds
                                                     else 'HomeGroundPred' if PercentageCorrectHomeGround > PercentageCorrectPoisson and PercentageCorrectHomeGround > PercentageCorrectOdds
                                                     else 'OddsPred' for (PercentageCorrectPoisson, PercentageCorrectHomeGround, PercentageCorrectOdds) 
                                                      in zip(combined_prediction_results_table['PercentageCorrectPoisson'], 
                                                             combined_prediction_results_table['PercentageCorrectHomeGround'],
                                                             combined_prediction_results_table['PercentageCorrectOdds'])]
display(combined_prediction_results_table)

Unnamed: 0_level_0,PercentageCorrectPoisson,PercentageCorrectHomeGround,PercentageCorrectOdds,BestPredictor
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9,0.532609,0.619565,0.652174,OddsPred
10,0.551136,0.607955,0.619318,OddsPred
11,0.604278,0.561497,0.770053,OddsPred
12,0.590909,0.555556,0.782828,OddsPred
13,0.60101,0.555556,0.747475,OddsPred
14,0.565657,0.565657,0.712121,OddsPred
15,0.619289,0.538071,0.71066,OddsPred
16,0.606061,0.606061,0.727273,OddsPred
17,0.621212,0.59596,0.661616,OddsPred
18,0.60101,0.545455,0.70202,OddsPred


### Discussion of results
- Based on the above the best way to predict the results of a footy match is to just follow the odds
- Ways to improve the model
- Currently only using the 2019 results as training data
- Better way may be to use the 2009 to 2018 results as training data and test on the 2019 data
- Will try that next
- ALternatively could try to 