# 0005.0003 Modeling Strategy - Betting, Expected Values

In [1]:
import pathlib
import os
import sys

%matplotlib inline
import matplotlib.pyplot as plt # creating visualizations
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

import pandas as pd
import numpy as np # basic math and random numbers

import torch # package for building functions with learnable parameters
import torch.nn as nn # prebuilt functions specific to neural networks
from torch.autograd import Variable # storing data while learning

from scipy import optimize
from sklearn.metrics import mean_squared_error

# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
# add the 'src' directory to path to import modules
src_dir = pathlib.Path().cwd().resolve().parent / 'src'
#src_dir = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)
# import my class code from the source
# %aimport src-dir.filename

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


DATA_PATH = pathlib.Path().cwd().resolve().parent / 'data' / 'soccer' / 'raw' / 'United Kingdom' / 'english premier league' / '2000-2001' / 'football-data-co-uk' / 'season-data' / '2000-2001.csv' 
print(DATA_PATH)

/media/david/5C14F53A14F517AA/code/ana_py37/projects/soccer-predictions/data/soccer/raw/United Kingdom/english premier league/2000-2001/football-data-co-uk/season-data/2000-2001.csv


In [2]:
df = pd.read_csv(DATA_PATH)
df.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Attendance,Referee,HS,AS,HST,AST,HHW,AHW,HC,AC,HF,AF,HO,AO,HY,AY,HR,AR,HBP,ABP,GBH,GBD,GBA,IWH,IWD,IWA,LBH,LBD,LBA,SBH,SBD,SBA,WHH,WHD,WHA
0,E0,19/08/00,Charlton,Man City,4,0,H,2,0,H,20043,Rob Harris,17,8,14,4,2,1,6,6,13,12,8,6,1,2,0,0,10,20,2.0,3.0,3.2,2.2,2.9,2.7,2.2,3.25,2.75,2.2,3.25,2.88,2.1,3.2,3.1
1,E0,19/08/00,Chelsea,West Ham,4,2,H,1,0,H,34914,Graham Barber,17,12,10,5,1,0,7,7,19,14,2,3,1,2,0,0,10,20,1.47,3.4,5.2,1.6,3.2,4.2,1.5,3.4,6.0,1.5,3.6,6.0,1.44,3.6,6.5
2,E0,19/08/00,Coventry,Middlesbrough,1,3,A,1,1,D,20624,Barry Knight,6,16,3,9,0,1,8,4,15,21,1,3,5,3,1,0,75,30,2.15,3.0,3.0,2.2,2.9,2.7,2.25,3.2,2.75,2.3,3.2,2.75,2.3,3.2,2.62
3,E0,19/08/00,Derby,Southampton,2,2,D,1,2,A,27223,Andy D'Urso,6,13,4,6,0,0,5,8,11,13,0,2,1,1,0,0,10,10,2.0,3.1,3.2,1.8,3.0,3.5,2.2,3.25,2.75,2.05,3.2,3.2,2.0,3.2,3.2
4,E0,19/08/00,Leeds,Everton,2,0,H,2,0,H,40010,Dermot Gallagher,17,12,8,6,0,0,6,4,21,20,6,1,1,3,0,0,10,30,1.65,3.3,4.3,1.55,3.3,4.5,1.55,3.5,5.0,1.57,3.6,5.0,1.61,3.5,4.5


In [3]:
df = df[['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'LBH', 'LBD', 'LBA', 'WHH', 'WHD', 'WHA']]
df.describe()

Unnamed: 0,FTHG,FTAG,LBH,LBD,LBA,WHH,WHD,WHA
count,380.0,380.0,325.0,325.0,325.0,380.0,380.0,380.0
mean,1.544737,1.065789,2.190277,3.415569,3.967354,2.182237,3.403079,4.009053
std,1.285436,1.02907,0.888833,0.347241,2.086878,0.91995,0.378054,2.13942
min,0.0,0.0,1.17,2.87,1.36,1.16,2.9,1.33
25%,1.0,0.0,1.57,3.2,2.4,1.57,3.2,2.5
50%,1.0,1.0,1.91,3.25,3.25,1.9,3.3,3.4
75%,2.0,2.0,2.5,3.5,5.0,2.5,3.5,5.0
max,6.0,4.0,7.0,5.5,12.0,8.0,5.5,12.0


## Betting

#### Decimal Odds:

Take decimal odds of 2.5 - Bet 1 stake, and if a win will get 2.5 back - the original stake + profit of $1.5

If the bet loses, then lose $1

#### Overround:

If home win, draw, and away win have odds of 2, 3.25, 3.4, the overround will be 
$$(\frac{1}{2} + \frac{1}{3.25} + \frac{1}{3.4}) \times 100\% = 110.2\%$$

The more outcomes in an event, the more the overround, because there is more uncertainty in the eventual outcome

#### Expected Value:

Make a bet on Man City to win a game at decimal odds of 3.5 - `o`

We believe the true chance of a win is 0.25 (25%) - `p`

We stake $1 `s`

$$Expected\; Value = p \times o - 1$$

$$EV = 0.25 \times 3.5 -1 = -0.125$$


#### Return

$$Expected\; Return = p \times o$$ 

$$Expected\; Return = 0.25 \times 3.5 = 0.875$$

that is after betting 1 we expect to have $0.875

#### Fair Odds

fair odds - $f = \frac{1}{p}$

Where $


Where $o > f$, $R > 1$ we should place the bet 


## Simulated EV, and Returns


We will simulate some probabilities as though we have generated them from a model, and the simulate some bets

In [4]:
# Simulate Some Model Outputs
df['p_hwin'] = np.random.uniform(0.1, 0.4, size=len(df))
df['p_awin'] = np.random.uniform(0.05, 0.4, size=len(df))
df['p_draw'] = 1 - (df['p_hwin'] + df['p_awin'])
df.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,LBH,LBD,LBA,WHH,WHD,WHA,p_hwin,p_awin,p_draw
0,19/08/00,Charlton,Man City,4,0,H,2.2,3.25,2.75,2.1,3.2,3.1,0.236714,0.117019,0.646267
1,19/08/00,Chelsea,West Ham,4,2,H,1.5,3.4,6.0,1.44,3.6,6.5,0.217062,0.24444,0.538499
2,19/08/00,Coventry,Middlesbrough,1,3,A,2.25,3.2,2.75,2.3,3.2,2.62,0.120943,0.069644,0.809413
3,19/08/00,Derby,Southampton,2,2,D,2.2,3.25,2.75,2.0,3.2,3.2,0.291896,0.062182,0.645922
4,19/08/00,Leeds,Everton,2,0,H,1.55,3.5,5.0,1.61,3.5,4.5,0.233271,0.109356,0.657372


In [5]:
# calculate the EVs
df['EV_hwin'] = (df['p_hwin'] * df['WHH']) - 1
df['EV_awin'] = (df['p_awin'] * df['WHA']) - 1
df['EV_draw'] = (df['p_draw'] * df['WHD']) - 1
df.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,LBH,LBD,LBA,WHH,WHD,WHA,p_hwin,p_awin,p_draw,EV_hwin,EV_awin,EV_draw
0,19/08/00,Charlton,Man City,4,0,H,2.2,3.25,2.75,2.1,3.2,3.1,0.236714,0.117019,0.646267,-0.5029,-0.637242,1.068054
1,19/08/00,Chelsea,West Ham,4,2,H,1.5,3.4,6.0,1.44,3.6,6.5,0.217062,0.24444,0.538499,-0.687431,0.588858,0.938595
2,19/08/00,Coventry,Middlesbrough,1,3,A,2.25,3.2,2.75,2.3,3.2,2.62,0.120943,0.069644,0.809413,-0.721832,-0.817533,1.590122
3,19/08/00,Derby,Southampton,2,2,D,2.2,3.25,2.75,2.0,3.2,3.2,0.291896,0.062182,0.645922,-0.416208,-0.801016,1.06695
4,19/08/00,Leeds,Everton,2,0,H,1.55,3.5,5.0,1.61,3.5,4.5,0.233271,0.109356,0.657372,-0.624433,-0.507896,1.300802


In [6]:
# Bet on positive EV, and calculate profits/losses
df1 = df.copy(deep=True)
df1 = df1[['HomeTeam', 'AwayTeam', 'FTR', 'WHH', 'WHD', 'WHA', 'EV_hwin', 'EV_awin', 'EV_draw',
          'p_hwin', 'p_awin', 'p_draw']]
EV_threshold = 0.0

for bet in ['hwin', 'draw', 'awin']:
    betl = bet[0].capitalize()
    bets = (df1['EV_' + bet] > EV_threshold)
    df1.loc[bets, 'PROFIT_' + betl] = np.where(df1.loc[bets, 'FTR'] == betl, df1.loc[bets,'WH'+ betl]-1, -1)
    
df1.head(20)

Unnamed: 0,HomeTeam,AwayTeam,FTR,WHH,WHD,WHA,EV_hwin,EV_awin,EV_draw,p_hwin,p_awin,p_draw,PROFIT_H,PROFIT_D,PROFIT_A
0,Charlton,Man City,H,2.1,3.2,3.1,-0.5029,-0.637242,1.068054,0.236714,0.117019,0.646267,,-1.0,
1,Chelsea,West Ham,H,1.44,3.6,6.5,-0.687431,0.588858,0.938595,0.217062,0.24444,0.538499,,-1.0,-1.0
2,Coventry,Middlesbrough,A,2.3,3.2,2.62,-0.721832,-0.817533,1.590122,0.120943,0.069644,0.809413,,-1.0,
3,Derby,Southampton,D,2.0,3.2,3.2,-0.416208,-0.801016,1.06695,0.291896,0.062182,0.645922,,2.2,
4,Leeds,Everton,H,1.61,3.5,4.5,-0.624433,-0.507896,1.300802,0.233271,0.109356,0.657372,,-1.0,
5,Leicester,Aston Villa,D,2.4,3.25,2.5,-0.367821,-0.457287,0.688396,0.263408,0.217085,0.519507,,2.25,
6,Liverpool,Bradford,H,1.33,4.0,8.0,-0.65581,0.046468,1.441609,0.258789,0.130808,0.610402,,-1.0,-1.0
7,Sunderland,Arsenal,H,3.75,3.0,1.9,-0.107461,-0.62397,0.692237,0.23801,0.197911,0.564079,,-1.0,
8,Tottenham,Ipswich,H,1.44,3.6,6.5,-0.494695,0.282528,0.626414,0.350906,0.197312,0.451782,,-1.0,-1.0
9,Man United,Newcastle,H,1.4,3.75,7.0,-0.530965,1.411274,0.201903,0.335025,0.344468,0.320508,,-1.0,-1.0


In [7]:
n_bets = df1[['PROFIT_H', 'PROFIT_D', 'PROFIT_A']].count()
n_bets

PROFIT_H     23
PROFIT_D    359
PROFIT_A    119
dtype: int64

In [8]:
rets = df1[['PROFIT_H', 'PROFIT_D', 'PROFIT_A']].sum()
rets

PROFIT_H     1.85
PROFIT_D   -42.00
PROFIT_A   -38.29
dtype: float64

In [9]:
ret = df1[['PROFIT_H', 'PROFIT_D', 'PROFIT_A']].sum().sum()
ret

-78.44

## References

https://www.amazon.ca/How-Find-Black-Coal-Cellar/dp/1843440679