# 00X.00X XXX - Stepping thru Season Predictions

In [1]:
import pathlib
import sys
import joblib

import pandas as pd
import numpy as np
import missingno as msno

# import scipy.stats as stats
# import statsmodels.api as sm
# import statsmodels.formula.api as smf
# from scipy.stats import poisson, nbinom


import matplotlib.pyplot as plt
# import matplotlib.ticker as ticker
# import seaborn as sns

%matplotlib inline

# Load the "autoreload" extension
%load_ext autoreload
# always reload all modules
%autoreload 2
#add the 'src' directory to path to import modules
PROJECT_DIR = pathlib.Path.cwd().resolve().parent
sys.path.append(str(PROJECT_DIR))

from src.visualization.visualize import get_model_diagnosis
from src.visualization.visualize import extend_cols
from src.data.transform_long_to_wide import run_transform_ts_to_supervised
from src.data.transformed_to_all_stacked import transformed_to_all_stacked
from src.data.stacked_to_ts_featured import clean_df
from src.models.metrics import insert_rps
from src.betting.simulate import simulate_betting

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

SAMPLE_DIR = PROJECT_DIR / 'data' / 'sample'
SAMP_STACKED_DIR = SAMPLE_DIR / '01-stacked-seasons'
SAMP_FEATURED_DIR = SAMPLE_DIR / '02-featured'
SAMP_TRANSFORMED_DIR = SAMPLE_DIR / '03-transformed'

# fn = '2007-2008__2017-2018.csv'
# EDA_FP = SAMPLE_DIR / '01-stacked-seasons' / 'germany' / 'bundesliga' / fn
# SAVED_IMAGES_DIR = PROJECT_DIR / 'notebooks' / 'saved-images'

fn = '2007-2008__2017-2018.csv'
SAMP_STACKED_FP = SAMP_STACKED_DIR / 'germany' / 'bundesliga' / fn
SAMP_FEATURED_FP = SAMP_FEATURED_DIR / 'germany' / 'bundesliga' / fn
SAMP_TRANSFORMED_FP = SAMP_TRANSFORMED_DIR / 'germany' / 'bundesliga' / fn
SAMP_MODEL_FP = SAMPLE_DIR / 'models' / 'optimized-model.joblib'
SAMP_CAL_FP = SAMPLE_DIR / 'models' / 'calibration-model.joblib'
SAVE_FP = SAMPLE_DIR / 'check-output' / 'test.csv'

In [2]:
df_orig = pd.read_csv(SAVE_FP, index_col=None, parse_dates=['date'])
df_orig.head()

Unnamed: 0,nation,league,season,h,a,date,h_ftGoals,a_ftGoals,result,a_corners,a_fouls,a_htGoals,a_impliedDraw,a_impliedLose,a_impliedWin,a_poissDraw,a_poissLose,a_poissWin,a_redCards,a_shots,a_shotsOnTarget,a_yellowCards,awinOdds,awinOddsBwa,awinOddsMax,awinOddsMean,base_awin_prob,base_draw_prob,base_hwin_prob,drawOdds,drawOddsBwa,drawOddsMax,drawOddsMean,h_corners,h_fouls,h_htGoals,h_impliedDraw,h_impliedLose,h_impliedWin,h_poissDraw,h_poissLose,h_poissWin,h_redCards,h_shots,h_shotsOnTarget,h_yellowCards,hwinOdds,hwinOddsBwa,hwinOddsMax,hwinOddsMean,implied_awin,implied_draw,implied_hwin,ordinal_result,ordinal_result_1,ordinal_result_2,ordinal_result_3,seasonPercentile,weight,1,2,3,pred_class,pred_result,model_rps,bookie_rps,baseline_rps,hwin_unit_EV,draw_unit_EV,awin_unit_EV,hwin_bet,draw_bet,awin_bet,hwin_rets,draw_rets,awin_rets,placed_bet,game_ret
0,germany,bundesliga,2016-2017,ingolstadt,rb-leipzig,2016-12-10,1.0,0.0,hwin,4.0,16.0,0.0,0.253204,0.219288,0.527508,0.154623,0.072617,0.772724,0.0,13.0,3.0,4.0,1.8,1.75,1.85,1.8,0.275,0.275,0.45,3.75,3.75,3.75,3.63,3.0,19.0,1.0,0.253204,0.527508,0.219288,0.154623,0.772724,0.072617,1.0,4.0,1.0,1.0,4.33,4.75,5.0,4.62,0.527508,0.253204,0.219288,1,1,0,0,0.405229,0.611111,0.473198,0.231347,0.295455,1,hwin,0.182407,0.443889,0.189062,1.186174,-0.16021,-0.468181,True,False,False,4.0,0.0,0.0,hwin_bet,4.0
1,germany,bundesliga,2016-2017,borussia-monchengladbach,mainz,2016-12-11,1.0,0.0,hwin,1.0,15.0,0.0,0.238367,0.570939,0.190694,0.23148,0.353203,0.415314,1.0,12.0,4.0,0.0,5.0,5.0,5.35,4.86,0.275,0.275,0.45,4.0,3.7,4.06,3.85,4.0,13.0,0.0,0.238367,0.190694,0.570939,0.23148,0.415314,0.353203,0.0,9.0,3.0,2.0,1.67,1.72,1.85,1.71,0.190694,0.238367,0.570939,1,1,0,0,0.411765,0.611111,0.666234,0.231566,0.1022,1,hwin,0.060922,0.110229,0.189062,0.13926,-0.10847,-0.503308,True,False,False,0.85,0.0,0.0,hwin_bet,0.85
2,germany,bundesliga,2016-2017,hoffenheim,dortmund,2016-12-16,2.0,2.0,draw,4.0,18.0,1.0,0.23726,0.249747,0.512994,0.227446,0.445892,0.326659,1.0,14.0,4.0,1.0,1.85,1.91,2.04,1.91,0.275,0.275,0.45,4.0,4.1,4.1,3.92,2.0,22.0,2.0,0.23726,0.512994,0.249747,0.227446,0.326659,0.445892,0.0,13.0,5.0,5.0,3.8,3.75,4.3,3.74,0.512994,0.23726,0.249747,2,0,1,0,0.415033,1.0,0.352755,0.225244,0.422001,3,awin,0.151261,0.162768,0.139062,0.319303,-0.117045,-0.193977,True,False,False,-1.0,0.0,0.0,hwin_bet,-1.0
3,germany,bundesliga,2016-2017,schalke,sc-freiburg,2016-12-17,1.0,1.0,draw,1.0,9.0,0.0,0.238367,0.570939,0.190694,0.216683,0.621707,0.161605,0.0,6.0,2.0,0.0,5.0,5.25,5.5,5.16,0.275,0.275,0.45,4.0,4.0,4.15,3.9,9.0,12.0,0.0,0.238367,0.190694,0.570939,0.216683,0.161605,0.621707,0.0,17.0,2.0,1.0,1.67,1.65,1.77,1.67,0.190694,0.238367,0.570939,2,0,1,0,0.434641,1.0,0.566038,0.243224,0.190738,1,hwin,0.17839,0.181168,0.139062,-0.054717,-0.051428,-0.01579,False,False,False,0.0,0.0,0.0,no_bet,0.0
4,germany,bundesliga,2016-2017,wolfsburg,eintracht-frankfurt,2016-12-17,1.0,0.0,hwin,2.0,18.0,0.0,0.298805,0.38247,0.318725,0.283842,0.165794,0.550365,0.0,7.0,1.0,4.0,3.0,2.95,3.1,2.95,0.275,0.275,0.45,3.2,3.3,3.32,3.21,4.0,17.0,1.0,0.298805,0.318725,0.38247,0.283842,0.550365,0.165794,0.0,15.0,3.0,3.0,2.5,2.45,2.67,2.5,0.318725,0.298805,0.38247,1,1,0,0,0.434641,0.611111,0.469726,0.210758,0.319516,1,hwin,0.191641,0.241464,0.189062,0.174315,-0.323467,-0.057427,True,False,False,1.67,0.0,0.0,hwin_bet,1.67


In [3]:
df = df_orig.copy(deep=True)

df['Fair Home Win Odds'] = 1/df['1'] ; df['Fair Draw Odds'] = 1/df['2'] ; df['Fair Away Win Odds'] = 1/df['3']
cols = ['league', 'date', 'h', 'a', 'result', 'Fair Home Win Odds', 'Fair Draw Odds', 'Fair Away Win Odds',
        'hwinOddsMax', 'drawOddsMax', 'awinOddsMax', 'placed_bet', 'game_ret']
df = df[cols]

df.rename(columns={'h': 'Home', 'a': 'Away',
                   'result': 'Result',
                   'hwinOddsMax': 'Best Home Win Odds', 'drawOddsMax': 'Best Draw Odds',
                  'awinOddsMax': 'Best Away Win Odds', 'placed_bet': 'Bet to Place',
                   'game_ret': '$1 Bet Profit/Loss'}, inplace=True)
df['Bet to Place'] = df['Bet to Place'].map({'hwin_bet': 'Home Win', 'draw_bet': ' Draw', 'awin_bet': 'Away Win',
                        'no_bet': 'None'})
df['Result'] = df['Result'].map({'hwin': 'Home Win', 'draw': ' Draw', 'awin': 'Away Win'})

pre_bet_cols = ['league', 'date', 'Home', 'Away', 'Fair Home Win Odds', 'Fair Draw Odds', 'Fair Away Win Odds',
               'Best Home Win Odds', 'Best Draw Odds', 'Best Away Win Odds', 'Bet to Place']
post_bet_cols = ['Result', '$1 Bet Profit/Loss']
df = df.round(2)
df.head()

Unnamed: 0,league,date,Home,Away,Result,Fair Home Win Odds,Fair Draw Odds,Fair Away Win Odds,Best Home Win Odds,Best Draw Odds,Best Away Win Odds,Bet to Place,$1 Bet Profit/Loss
0,bundesliga,2016-12-10,ingolstadt,rb-leipzig,Home Win,2.11,4.32,3.38,5.0,3.75,1.85,Home Win,4.0
1,bundesliga,2016-12-11,borussia-monchengladbach,mainz,Home Win,1.5,4.32,9.78,1.85,4.06,5.35,Home Win,0.85
2,bundesliga,2016-12-16,hoffenheim,dortmund,Draw,2.83,4.44,2.37,4.3,4.1,2.04,Home Win,-1.0
3,bundesliga,2016-12-17,schalke,sc-freiburg,Draw,1.77,4.11,5.24,1.77,4.15,5.5,,0.0
4,bundesliga,2016-12-17,wolfsburg,eintracht-frankfurt,Home Win,2.13,4.74,3.13,2.67,3.32,3.1,Home Win,1.67


In [4]:
# Some weeks have no games played - out of season, so we cut a week out if the dataframe is empty
weeks = [g for n, g in df.groupby(pd.Grouper(key='date',freq='W')) if len(g)>0]

## Week 1

### Prediction

In [5]:
n = 0
weeks[n][pre_bet_cols]

Unnamed: 0,league,date,Home,Away,Fair Home Win Odds,Fair Draw Odds,Fair Away Win Odds,Best Home Win Odds,Best Draw Odds,Best Away Win Odds,Bet to Place
0,bundesliga,2016-12-10,ingolstadt,rb-leipzig,2.11,4.32,3.38,5.0,3.75,1.85,Home Win
1,bundesliga,2016-12-11,borussia-monchengladbach,mainz,1.5,4.32,9.78,1.85,4.06,5.35,Home Win


### Result

In [6]:
weeks[n][pre_bet_cols + post_bet_cols]

Unnamed: 0,league,date,Home,Away,Fair Home Win Odds,Fair Draw Odds,Fair Away Win Odds,Best Home Win Odds,Best Draw Odds,Best Away Win Odds,Bet to Place,Result,$1 Bet Profit/Loss
0,bundesliga,2016-12-10,ingolstadt,rb-leipzig,2.11,4.32,3.38,5.0,3.75,1.85,Home Win,Home Win,4.0
1,bundesliga,2016-12-11,borussia-monchengladbach,mainz,1.5,4.32,9.78,1.85,4.06,5.35,Home Win,Home Win,0.85


In [7]:
print(f'Weekly Return: ${weeks[n][post_bet_cols[1]].sum()}')

Weekly Return: $4.85


## Week 2

### Prediction

In [8]:
n = 1
weeks[n][pre_bet_cols]

Unnamed: 0,league,date,Home,Away,Fair Home Win Odds,Fair Draw Odds,Fair Away Win Odds,Best Home Win Odds,Best Draw Odds,Best Away Win Odds,Bet to Place
2,bundesliga,2016-12-16,hoffenheim,dortmund,2.83,4.44,2.37,4.3,4.1,2.04,Home Win
3,bundesliga,2016-12-17,schalke,sc-freiburg,1.77,4.11,5.24,1.77,4.15,5.5,
4,bundesliga,2016-12-17,wolfsburg,eintracht-frankfurt,2.13,4.74,3.13,2.67,3.32,3.1,Home Win
5,bundesliga,2016-12-17,sv-werder-bremen,fc-koln,1.67,4.84,5.1,2.76,3.52,2.78,Home Win
6,bundesliga,2016-12-17,augsburg,borussia-monchengladbach,1.75,3.73,6.29,3.2,3.4,2.6,Home Win
7,bundesliga,2016-12-17,rb-leipzig,hertha-berlin,1.77,4.09,5.28,1.8,4.09,6.4,Away Win
8,bundesliga,2016-12-18,leverkusen,ingolstadt,2.27,4.57,2.94,1.63,4.3,6.6,Away Win


### Result

In [9]:
weeks[n][pre_bet_cols + post_bet_cols]

Unnamed: 0,league,date,Home,Away,Fair Home Win Odds,Fair Draw Odds,Fair Away Win Odds,Best Home Win Odds,Best Draw Odds,Best Away Win Odds,Bet to Place,Result,$1 Bet Profit/Loss
2,bundesliga,2016-12-16,hoffenheim,dortmund,2.83,4.44,2.37,4.3,4.1,2.04,Home Win,Draw,-1.0
3,bundesliga,2016-12-17,schalke,sc-freiburg,1.77,4.11,5.24,1.77,4.15,5.5,,Draw,0.0
4,bundesliga,2016-12-17,wolfsburg,eintracht-frankfurt,2.13,4.74,3.13,2.67,3.32,3.1,Home Win,Home Win,1.67
5,bundesliga,2016-12-17,sv-werder-bremen,fc-koln,1.67,4.84,5.1,2.76,3.52,2.78,Home Win,Draw,-1.0
6,bundesliga,2016-12-17,augsburg,borussia-monchengladbach,1.75,3.73,6.29,3.2,3.4,2.6,Home Win,Home Win,2.2
7,bundesliga,2016-12-17,rb-leipzig,hertha-berlin,1.77,4.09,5.28,1.8,4.09,6.4,Away Win,Home Win,-1.0
8,bundesliga,2016-12-18,leverkusen,ingolstadt,2.27,4.57,2.94,1.63,4.3,6.6,Away Win,Away Win,5.6


In [10]:
print(f'Weekly Return: ${weeks[n][post_bet_cols[1]].sum()}')

Weekly Return: $6.47


### All Weekly Returns 

In [11]:
for n in range(0, len(weeks)):
    print(f'Week: {n}: Weekly profit/loss: ${weeks[n][post_bet_cols[1]].sum():.2f}')


Week: 0: Weekly profit/loss: $4.85
Week: 1: Weekly profit/loss: $6.47
Week: 2: Weekly profit/loss: $-0.02
Week: 3: Weekly profit/loss: $-5.39
Week: 4: Weekly profit/loss: $3.67
Week: 5: Weekly profit/loss: $11.70
Week: 6: Weekly profit/loss: $4.95
Week: 7: Weekly profit/loss: $-4.80
Week: 8: Weekly profit/loss: $-6.10
Week: 9: Weekly profit/loss: $5.02
Week: 10: Weekly profit/loss: $9.18
Week: 11: Weekly profit/loss: $-1.27
Week: 12: Weekly profit/loss: $-6.50
Week: 13: Weekly profit/loss: $3.37
Week: 14: Weekly profit/loss: $7.88
Week: 15: Weekly profit/loss: $5.18
Week: 16: Weekly profit/loss: $13.83
Week: 17: Weekly profit/loss: $-2.80
Week: 18: Weekly profit/loss: $-5.13
Week: 19: Weekly profit/loss: $0.58
Week: 20: Weekly profit/loss: $-6.00
Week: 21: Weekly profit/loss: $-2.06
Week: 22: Weekly profit/loss: $0.15
Week: 23: Weekly profit/loss: $8.12
Week: 24: Weekly profit/loss: $2.93
Week: 25: Weekly profit/loss: $-1.12
Week: 26: Weekly profit/loss: $-3.70
Week: 27: Weekly profit/

### Net Sum of all weeks

In [12]:
df[post_bet_cols[1]].sum()

48.8