# Bayesian Random Intercept Model

In [1]:
import sys

sys.path.append("../../")

import penaltyblog as pb

## Get data from football-data.co.uk

In [2]:
fb = pb.scrapers.FootballData("ENG Premier League", "2019-2020")
df = fb.get_fixtures()

df.head()

Unnamed: 0_level_0,date,datetime,season,competition,div,time,team_home,team_away,fthg,ftag,...,b365_cahh,b365_caha,pcahh,pcaha,max_cahh,max_caha,avg_cahh,avg_caha,goals_home,goals_away
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1565308800---liverpool---norwich,2019-08-09,2019-08-09 20:00:00,2019-2020,ENG Premier League,E0,20:00,Liverpool,Norwich,4,1,...,1.91,1.99,1.94,1.98,1.99,2.07,1.9,1.99,4,1
1565395200---bournemouth---sheffield_united,2019-08-10,2019-08-10 15:00:00,2019-2020,ENG Premier League,E0,15:00,Bournemouth,Sheffield United,1,1,...,1.95,1.95,1.98,1.95,2.0,1.96,1.96,1.92,1,1
1565395200---burnley---southampton,2019-08-10,2019-08-10 15:00:00,2019-2020,ENG Premier League,E0,15:00,Burnley,Southampton,3,0,...,1.87,2.03,1.89,2.03,1.9,2.07,1.86,2.02,3,0
1565395200---crystal_palace---everton,2019-08-10,2019-08-10 15:00:00,2019-2020,ENG Premier League,E0,15:00,Crystal Palace,Everton,0,0,...,1.82,2.08,1.97,1.96,2.03,2.08,1.96,1.93,0,0
1565395200---tottenham---aston_villa,2019-08-10,2019-08-10 17:30:00,2019-2020,ENG Premier League,E0,17:30,Tottenham,Aston Villa,3,1,...,2.1,1.7,2.18,1.77,2.21,1.87,2.08,1.8,3,1


## Train the Model

In [3]:
clf = pb.models.BayesianRandomInterceptGoalModel(
    df["goals_home"], df["goals_away"], df["team_home"], df["team_away"]
)
clf.fit()

20:21:36 - cmdstanpy - INFO - compiling stan file /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpownlcbuh.stan to exe file /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpownlcbuh
20:21:42 - cmdstanpy - INFO - compiled model executable: /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpownlcbuh
20:21:43 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

20:21:56 - cmdstanpy - INFO - CmdStan done processing.





Module: Penaltyblog

Model: Bayesian Hierarchical Random Intercept (Stan)

Number of parameters: 42
Team                 Attack               Defence              Intercept           
--------------------------------------------------------------------------------
Arsenal              0.053                -0.039               0.057               
Aston Villa          -0.075               0.2                  -0.078              
Bournemouth          -0.087               0.175                -0.089              
Brighton             -0.102               0.034                -0.102              
Burnley              -0.062               -0.02                -0.06               
Chelsea              0.153                0.052                0.155               
Crystal Palace       -0.197               -0.028               -0.195              
Everton              -0.05                0.064                -0.048              
Leicester            0.137                -0.139               

## The model's parameters

In [5]:
clf.get_params()

{'teams': ['Arsenal',
  'Aston Villa',
  'Bournemouth',
  'Brighton',
  'Burnley',
  'Chelsea',
  'Crystal Palace',
  'Everton',
  'Leicester',
  'Liverpool',
  'Man City',
  'Man United',
  'Newcastle',
  'Norwich',
  'Sheffield United',
  'Southampton',
  'Tottenham',
  'Watford',
  'West Ham',
  'Wolves'],
 'attack': {'Arsenal': np.float64(0.053),
  'Aston Villa': np.float64(-0.075),
  'Bournemouth': np.float64(-0.087),
  'Brighton': np.float64(-0.102),
  'Burnley': np.float64(-0.062),
  'Chelsea': np.float64(0.153),
  'Crystal Palace': np.float64(-0.197),
  'Everton': np.float64(-0.05),
  'Leicester': np.float64(0.137),
  'Liverpool': np.float64(0.248),
  'Man City': np.float64(0.335),
  'Man United': np.float64(0.128),
  'Newcastle': np.float64(-0.113),
  'Norwich': np.float64(-0.257),
  'Sheffield United': np.float64(-0.107),
  'Southampton': np.float64(0.017),
  'Tottenham': np.float64(0.095),
  'Watford': np.float64(-0.132),
  'West Ham': np.float64(-0.001),
  'Wolves': np.floa

## Predict Match Outcomes

In [6]:
probs = clf.predict("Liverpool", "Wolves")
probs

Module: Penaltyblog

Class: FootballProbabilityGrid

Home Goal Expectation: 2.1864874206370515
Away Goal Expectation: 0.9940010596762124

Home Win: 0.6396642871036486
Draw: 0.1947615828647382
Away Win: 0.1655741163506108

### 1x2 Probabilities

In [7]:
probs.home_draw_away

[np.float64(0.6396642871036486),
 np.float64(0.1947615828647382),
 np.float64(0.1655741163506108)]

In [8]:
probs.home_win

np.float64(0.6396642871036486)

In [9]:
probs.draw

np.float64(0.1947615828647382)

In [10]:
probs.away_win

np.float64(0.1655741163506108)

### Probablity of Total Goals >1.5

In [11]:
probs.total_goals("over", 1.5)

np.float64(0.8189676669374846)

### Probability of Asian Handicap 1.5

In [12]:
probs.asian_handicap("home", 1.5)

np.float64(0.41106450503541314)

## Probability of both teams scoring

In [13]:
probs.both_teams_to_score

np.float64(0.5489221455689794)

## Train the model with more recent data weighted to be more important

In [14]:
weights = pb.models.dixon_coles_weights(df["date"], 0.001)

clf = pb.models.BayesianRandomInterceptGoalModel(
    df["goals_home"], df["goals_away"], df["team_home"], df["team_away"], weights
)
clf.fit()

20:23:38 - cmdstanpy - INFO - compiling stan file /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpd3eh9t2b.stan to exe file /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpd3eh9t2b
20:23:44 - cmdstanpy - INFO - compiled model executable: /private/var/folders/qg/1pn4sbxj7h91pv3skp6h118c0000gn/T/tmpd3eh9t2b
20:23:44 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

20:23:56 - cmdstanpy - INFO - CmdStan done processing.





Module: Penaltyblog

Model: Bayesian Hierarchical Random Intercept (Stan)

Number of parameters: 42
Team                 Attack               Defence              Intercept           
--------------------------------------------------------------------------------
Arsenal              0.062                -0.047               0.057               
Aston Villa          -0.086               0.178                -0.083              
Bournemouth          -0.08                0.175                -0.083              
Brighton             -0.106               0.033                -0.102              
Burnley              -0.063               -0.03                -0.061              
Chelsea              0.154                0.054                0.156               
Crystal Palace       -0.195               -0.01                -0.195              
Everton              -0.049               0.054                -0.055              
Leicester            0.124                -0.112               