# Get Previous Outcomes From Sporting Life

In [1]:
import requests
import urllib
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook, tqdm
import datetime
import time
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw
import logging

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb
import statsmodels.api as sm

from Levenshtein import distance as levenshtein_distance

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None

# Get Data For Dates

In [4]:
past_dates = pd.date_range(start='2023-01-02', end='2023-01-12')

In [5]:
matches_data = []
goals_data = []
failed_dates = []

In [6]:
for d in tqdm(past_dates):
    try:
        yyyymmdd = d.strftime('%Y')+'-'+d.strftime('%m')+'-'+d.strftime('%d')
        dateurl = 'https://www.sportinglife.com/football/fixtures-results/'+yyyymmdd
        datejson = urllib.request.urlopen(dateurl).read()
        soup = BeautifulSoup(datejson)
        soup_find = soup.body.find(attrs={"type": "application/json"})
        soup_json = json.loads(soup_find.text)
        
        matches = soup_json.get('props', {}).get('pageProps', {}).get('matches', [])
        for m in matches:
            match_ref = m.get('match_reference', {}).get('id')
            match_date = m.get('match_date')
            match_time = m.get('match_time')

            matches_data.append(
                [
                    match_ref, match_date, match_time,
                    m.get('state'),
                    m.get('match_type'), 
                    m.get('competition', {}).get('competition_reference', {}).get('id'),
                    m.get('competition', {}).get('name'),
                    str(m.get('round')),
                    m.get('legs'),
                    m.get('leg'),
                    m.get('team_score_a', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_a', {}).get('team', {}).get('name'),
                    m.get('team_score_a', {}).get('team', {}).get('short_name'),
                    m.get('team_score_a', {}).get('score', [])[0].get('score'),
                    m.get('team_score_b', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_b', {}).get('team', {}).get('name'),
                    m.get('team_score_b', {}).get('team', {}).get('short_name'),
                    m.get('team_score_b', {}).get('score', [])[0].get('score'),
                    m.get('match_outcome', {}).get('outcome'),
                    m.get('match_outcome', {}).get('result_type'),
                    m.get('match_outcome', {}).get('winner', {}).get('team_reference', {}).get('id'),
                    m.get('match_outcome', {}).get('winner', {}).get('name'),
                    m.get('match_outcome', {}).get('winner', {}).get('short_name'),
                    m.get('half_time_score', {}).get('home'),
                    m.get('half_time_score', {}).get('away'),
                    m.get('full_time_score', {}).get('home'),
                    m.get('full_time_score', {}).get('away'),
                    m.get('clock')
                ]
            )

            homegoals = m.get('homeGoals', [])
            for g in homegoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'home'
                        ]
                    )

            awaygoals = m.get('awayGoals', [])
            for g in awaygoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'away'
                        ]
                    )
        
    except:
        failed_dates.append(d)

100%|██████████| 11/11 [00:06<00:00,  1.79it/s]


In [7]:
matches_cols = [
    'match_ref', 'match_date', 'match_time', 'state', 'match_type', 'competition_id', 'competition_name', 
    'round', 'legs', 'leg', 'team_a_id', 'team_a_name', 'team_a_short_name', 'team_a_score',
    'team_b_id', 'team_b_name', 'team_b_short_name', 'team_b_score',
    'outcome', 'result_type', 'winner_id', 'winner_name', 'winner_short_name',
    'half_time_score_home', 'half_time_score_away',
    'full_time_score_home', 'full_time_score_away', 'clock'
]
matches_df = pd.DataFrame(matches_data, columns=matches_cols)

In [8]:
goals_cols = [
    'match_ref', 'match_date', 'match_time', 'player', 'goal_id', 'type', 'time', 'event_id', 'event_time', 'side'
]
goals_df = pd.DataFrame(goals_data, columns=goals_cols)

In [9]:
def get_extra_time(t):
    time_split = t.replace("'", "").split("+")
    if len(time_split)>1:
        return int(time_split[1])
    else:
        return 0

goals_df['time_regular'] = goals_df['time'].apply(lambda x: int(x.replace("'", "").split("+")[0]))
goals_df['time_extra'] = goals_df['time'].apply(get_extra_time)

In [10]:
matches_df['year'] = matches_df['match_date'].apply(lambda x: x[:4])

In [11]:
time_cutoffs = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
cap_at = 90
time_features = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_pre_{t}m']
    if t > min(time_cutoffs):
        goals_df[f'goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'goals_pre_{t}m'] - goals_df[f'goals_pre_{time_cutoffs[i-1]}m'])
        time_features += [f'goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_post_{t}m']

# same for home and away goals
time_features_home_away = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'home_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_pre_{t}m']
    
    goals_df[f'away_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_pre_{t}m']
    
    if t > min(time_cutoffs):
        goals_df[f'home_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'home_goals_pre_{t}m'] - goals_df[f'home_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'home_goals_{time_cutoffs[i-1]}m_to_{t}m']
        
        goals_df[f'away_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'away_goals_pre_{t}m'] - goals_df[f'away_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'away_goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'home_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_post_{t}m']
    
    goals_df[f'away_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_post_{t}m']

In [12]:
goals_features = goals_df.groupby(['match_ref', 'match_date'])[time_features+time_features_home_away].sum().reset_index()

In [13]:
model_data = matches_df.merge(goals_features, how='left', on=['match_ref', 'match_date'])

for f in time_features + time_features_home_away:
    model_data[f] = model_data[f].fillna(0)
    
for t in time_cutoffs:
    model_data[f'goal_diff_at_{t}'] = model_data[f'home_goals_pre_{t}m'] - model_data[f'away_goals_pre_{t}m']
    model_data[f'abs_goal_diff_at_{t}'] = abs(model_data[f'goal_diff_at_{t}'])

In [14]:
# check only include games with no goals data if score is 0 - 0
model_data['goals_data_num_goals'] = model_data['goals_pre_90m'] + model_data['goals_post_90m']
model_data['matches_data_num_goals'] = model_data['team_a_score'] + model_data['team_b_score']

In [15]:
model_data_no_goals_data = model_data[model_data['goals_data_num_goals'] != model_data['matches_data_num_goals']]
model_data = model_data[model_data['goals_data_num_goals'] == model_data['matches_data_num_goals']]

In [16]:
model_data = model_data[model_data['half_time_score_home'].notnull()]

In [17]:
model_data['any_goal_post_50'] = (model_data['goals_post_50m']>0)*1
model_data['any_goal_post_60'] = (model_data['goals_post_60m']>0)*1
model_data['any_goal_post_70'] = (model_data['goals_post_70m']>0)*1
model_data['any_goal_post_75'] = (model_data['goals_post_75m']>0)*1
model_data['any_goal_post_80'] = (model_data['goals_post_80m']>0)*1
model_data['any_goal_post_85'] = (model_data['goals_post_85m']>0)*1

In [18]:
model_data['total_goals'] = model_data['team_a_score'] + model_data['team_b_score']
model_data['score_diff'] = model_data['team_a_score'] - model_data['team_b_score']
model_data['score_diff_abs'] = abs(model_data['score_diff'])

In [19]:
model_data['year'] = model_data['match_date'].apply(lambda x: x[:4]).astype(int)
model_data['month'] = model_data['match_date'].apply(lambda x: x[5:7]).astype(int)

In [20]:
model_data.shape

(337, 236)

# Get Data from DB

In [21]:
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/sl_bf_late_goals'
sql_engine = sqlalchemy.create_engine(connect_string)
testing_viable_matches = pd.read_sql('''SELECT * FROM testing_viable_matches''', con=sql_engine)
testing2_viable_matches = pd.read_sql('''SELECT * FROM testing2_viable_matches''', con=sql_engine)
testing_model_data = pd.read_sql('''SELECT * FROM testing_model_data''', con=sql_engine)
testing2_model_data = pd.read_sql('''SELECT * FROM testing2_model_data''', con=sql_engine)

In [22]:
testing_viable_matches.shape

(461, 78)

In [23]:
testing2_viable_matches.shape

(1299, 78)

In [24]:
select_cols1 = f'select v.{testing_viable_matches.columns[0]}'
for c in testing_viable_matches.columns[1:]:
    select_cols1 += f', v.{c}'
for c in testing_model_data.columns:
    if c not in testing_viable_matches.columns:
        select_cols1 += f', m.{c}'
        
select_cols2 = f'select v.{testing2_viable_matches.columns[0]}'
for c in testing2_viable_matches.columns[1:]:
    select_cols2 += f', v.{c}'
for c in testing2_model_data.columns:
    if c not in testing2_viable_matches.columns:
        select_cols2 += f', m.{c}'

In [25]:
viable_matches_with_model_data = pd.read_sql(f'''{select_cols1} FROM testing_viable_matches v LEFT JOIN testing_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data2 = pd.read_sql(f'''{select_cols2} FROM testing2_viable_matches v LEFT JOIN testing2_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data3 = pd.read_sql(f'''SELECT * FROM testing3_model_data_with_preds''', con=sql_engine)
viable_matches_with_model_data4 = pd.read_sql(f'''SELECT * FROM testing_live_model_data_with_preds''', con=sql_engine)

In [26]:
viable_matches_with_model_data.shape

(461, 131)

In [27]:
viable_matches_with_model_data2.shape

(1299, 277)

In [28]:
viable_matches_with_model_data3.shape

(360, 283)

In [29]:
viable_matches_with_model_data3.shape

(360, 283)

In [30]:
# add on the extra features needed for the original v1 data collected
viable_matches_with_model_data1 = viable_matches_with_model_data.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data2 = viable_matches_with_model_data2.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data3 = viable_matches_with_model_data3.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data4 = viable_matches_with_model_data4.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))

# Get models and do predictions

In [31]:
# # football models
# with open('/home/angus/projects/betting/football/models/late_goals_test_model_1.pickle', 'rb') as f:
#     test_model_1_80m = pickle.load(f)
with open('/home/angus/projects/betting/football/models/late_goals_test_models_2.pickle', 'rb') as f:
    models_dicts = pickle.load(f)

In [32]:
models_dicts

{'any_goal_post_50': {'lin_mod': <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7f86c4500c88>,
  'rf_mod': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=6, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=2, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=16,
                         n_jobs=None, oob_score=False, random_state=None,
                         verbose=0, warm_start=False),
  'xgb_mod': XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                colsample_bynode=1, colsample_bytree=1, gamma=0,
                learning_rate=0.1, max_delta_step=0, max_depth=2,
                min_child_weight=1, missing=nan, n_estimators=4, n_jobs=1,
                nthread=None, objective='binary:logistic', random_state=0,
          

In [33]:
prediction_times = [50, 60, 70, 75, 80, 85]
model_data_1_with_preds = []
model_data_2_with_preds = []
for p in prediction_times:
    model_data_1_sub = viable_matches_with_model_data1[viable_matches_with_model_data1['next_prediction_time']==p]
    model_data_2_sub = viable_matches_with_model_data2[viable_matches_with_model_data2['next_prediction_time']==p]

    train_rc_comp = models_dicts[f'any_goal_post_{p}']['train_rc_comp']

    model_data_1_sub['competition_name_rc'] = model_data_1_sub['competition_name']
    model_data_1_sub.loc[~model_data_1_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_1_sub = model_data_1_sub.merge(train_rc_comp, how='left', on='competition_name_rc')

    model_data_2_sub['competition_name_rc'] = model_data_2_sub['competition_name']
    model_data_2_sub.loc[~model_data_2_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_2_sub = model_data_2_sub.merge(train_rc_comp, how='left', on='competition_name_rc')
    
    model_data_1_sub = model_data_1_sub[model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    model_data_2_sub = model_data_2_sub[model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    
    if len(model_data_1_sub) > 0:
        model_data_1_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_1_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_1_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    if len(model_data_2_sub) > 0:
        model_data_2_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_2_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_2_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    model_data_1_with_preds.append(model_data_1_sub)
    model_data_2_with_preds.append(model_data_2_sub)
    
model_data_1_with_preds = pd.concat(model_data_1_with_preds, axis=0)
model_data_2_with_preds = pd.concat(model_data_2_with_preds, axis=0)

  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)


In [34]:
model_data_1_with_preds['lm_odds_over'] = 1/model_data_1_with_preds['lm_preds']
model_data_1_with_preds['rf_odds_over'] = 1/model_data_1_with_preds['rf_preds']
model_data_1_with_preds['xgb_odds_over'] = 1/model_data_1_with_preds['xgb_preds']

In [35]:
model_data_2_with_preds['lm_odds_over'] = 1/model_data_2_with_preds['lm_preds']
model_data_2_with_preds['rf_odds_over'] = 1/model_data_2_with_preds['rf_preds']
model_data_2_with_preds['xgb_odds_over'] = 1/model_data_2_with_preds['xgb_preds']

In [36]:
model_data_1_with_preds['lm_odds_under'] = 1/(1-model_data_1_with_preds['lm_preds'])
model_data_1_with_preds['rf_odds_under'] = 1/(1-model_data_1_with_preds['rf_preds'])
model_data_1_with_preds['xgb_odds_under'] = 1/(1-model_data_1_with_preds['xgb_preds'])

In [37]:
model_data_2_with_preds['lm_odds_under'] = 1/(1-model_data_2_with_preds['lm_preds'])
model_data_2_with_preds['rf_odds_under'] = 1/(1-model_data_2_with_preds['rf_preds'])
model_data_2_with_preds['xgb_odds_under'] = 1/(1-model_data_2_with_preds['xgb_preds'])

In [38]:
model_data_3_with_preds = viable_matches_with_model_data3.copy()

model_data_4_with_preds = viable_matches_with_model_data4.copy()

In [39]:
model_data_1_with_preds.shape

(436, 377)

In [40]:
model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1).shape

(306, 377)

In [41]:
first_or_last = 'first'

if first_or_last == 'first':
    
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

else:
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

In [42]:
model_data_3_with_preds_unique.shape

(202, 518)

In [43]:
model_data_3_with_preds_unique.shape

(202, 518)

In [44]:
back_lay_max_pc = 0.1
odds_gap_min = 0

model_data_1_with_preds_unique['action'] = 'None'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_over_lay_1']/model_data_1_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_under_lay_1']/model_data_1_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [45]:
model_data_2_with_preds_unique['action'] = 'None'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_over_lay_1']/model_data_2_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_under_lay_1']/model_data_2_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [46]:
model_data_3_with_preds_unique['action'] = 'None'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_over_lay_1']/model_data_3_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_under_lay_1']/model_data_3_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [47]:
model_data_4_with_preds_unique['action'] = 'None'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_over_lay_1']/model_data_4_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_under_lay_1']/model_data_4_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [48]:
# need to add outcomes
model_data_1_with_preds_unique['outcome'] = None
model_data_2_with_preds_unique['outcome'] = None
model_data_3_with_preds_unique['outcome'] = None
model_data_4_with_preds_unique['outcome'] = None
for p in prediction_times:
    mask = model_data_1_with_preds_unique['next_prediction_time']==p
    model_data_1_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_1_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_2_with_preds_unique['next_prediction_time']==p
    model_data_2_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_2_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_3_with_preds_unique['next_prediction_time']==p
    model_data_3_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_3_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_4_with_preds_unique['next_prediction_time']==p
    model_data_4_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_4_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1


model_data_1_with_preds_unique['outcome'] = (model_data_1_with_preds_unique['total_goals_added_on_data'] > model_data_1_with_preds_unique['total_goals'])*1
model_data_2_with_preds_unique['outcome'] = (model_data_2_with_preds_unique['total_goals_added_on_data'] > model_data_2_with_preds_unique['total_goals'])*1
model_data_3_with_preds_unique['outcome'] = (model_data_3_with_preds_unique['total_goals_added_on_data'] > model_data_3_with_preds_unique['total_goals'])*1
model_data_4_with_preds_unique['outcome'] = (model_data_4_with_preds_unique['total_goals_added_on_data'] > model_data_4_with_preds_unique['total_goals'])*1

In [49]:
# sum(model_data_1_with_preds_unique['outcome']!=model_data_1_with_preds_unique['outcome_orig'])/len(model_data_1_with_preds_unique)
sum(model_data_2_with_preds_unique['outcome']!=model_data_2_with_preds_unique['outcome_orig'])/len(model_data_2_with_preds_unique)
# sum(model_data_3_with_preds_unique['outcome']!=model_data_3_with_preds_unique['outcome_orig'])/len(model_data_3_with_preds_unique)

0.013921113689095127

In [50]:
model_data_1_with_preds_unique['bet'] = 1
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_1_with_preds_unique['win'] = 0
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['outcome']==1), 'win'] = 1
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['outcome']==0), 'win'] = 1

In [51]:
model_data_1_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [52]:
model_data_2_with_preds_unique['bet'] = 1
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_2_with_preds_unique['win'] = 0
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['outcome']==1), 'win'] = 1
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['outcome']==0), 'win'] = 1

In [53]:
model_data_2_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [54]:
model_data_3_with_preds_unique['bet'] = 1
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_3_with_preds_unique['win'] = 0
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['outcome']==1), 'win'] = 1
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['outcome']==0), 'win'] = 1

In [55]:
model_data_3_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [56]:
model_data_4_with_preds_unique['bet'] = 1
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_4_with_preds_unique['win'] = 0
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['outcome']==1), 'win'] = 1
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['outcome']==0), 'win'] = 1

In [57]:
model_data_4_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [58]:
max_bet = 100
model_data_1_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_1_with_preds_unique['action']=='under'
model_data_1_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_1_with_preds_unique['action']=='over'
model_data_1_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_1_with_preds_unique['max_winnings'] = model_data_1_with_preds_unique['max_bet_amount']*model_data_1_with_preds_unique['winnings']

model_data_1_with_preds_unique['profit'] = model_data_1_with_preds_unique['winnings'] - model_data_1_with_preds_unique['bet']
model_data_1_with_preds_unique['max_profit'] = model_data_1_with_preds_unique['max_winnings'] - model_data_1_with_preds_unique['max_bet_amount']

In [59]:
model_data_2_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_2_with_preds_unique['action']=='under'
model_data_2_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_2_with_preds_unique['action']=='over'
model_data_2_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_2_with_preds_unique['max_winnings'] = model_data_2_with_preds_unique['max_bet_amount']*model_data_2_with_preds_unique['winnings']

model_data_2_with_preds_unique['profit'] = model_data_2_with_preds_unique['winnings'] - model_data_2_with_preds_unique['bet']
model_data_2_with_preds_unique['max_profit'] = model_data_2_with_preds_unique['max_winnings'] - model_data_2_with_preds_unique['max_bet_amount']

In [60]:
model_data_3_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_3_with_preds_unique['action']=='under'
model_data_3_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_3_with_preds_unique['action']=='over'
model_data_3_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_3_with_preds_unique['max_winnings'] = model_data_3_with_preds_unique['max_bet_amount']*model_data_3_with_preds_unique['winnings']

model_data_3_with_preds_unique['profit'] = model_data_3_with_preds_unique['winnings'] - model_data_3_with_preds_unique['bet']
model_data_3_with_preds_unique['max_profit'] = model_data_3_with_preds_unique['max_winnings'] - model_data_3_with_preds_unique['max_bet_amount']

In [61]:
model_data_4_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_4_with_preds_unique['action']=='under'
model_data_4_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds_unique['action']=='over'
model_data_4_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds_unique['max_winnings'] = model_data_4_with_preds_unique['max_bet_amount']*model_data_4_with_preds_unique['winnings']

model_data_4_with_preds_unique['profit'] = model_data_4_with_preds_unique['winnings'] - model_data_4_with_preds_unique['bet']
model_data_4_with_preds_unique['max_profit'] = model_data_4_with_preds_unique['max_winnings'] - model_data_4_with_preds_unique['max_bet_amount']

In [62]:
model_data_1_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,22,0,0,0.0,0.0,0.0,0.0,0.0
50,over,18,18,13,18.57,0.57,1663.13,1668.7075,5.5775
50,under,11,11,4,17.0,6.0,766.63,1472.92,706.29
60,,20,0,0,0.0,0.0,0.0,0.0,0.0
60,over,18,18,11,17.85,-0.15,1531.98,1656.4962,124.5162
60,under,13,13,4,13.6,0.6,1050.22,1038.835,-11.385
70,,15,0,0,0.0,0.0,0.0,0.0,0.0
70,over,14,14,8,17.44,3.44,1005.1,1186.0937,180.9937
70,under,22,22,10,23.86,1.86,1643.52,1795.9906,152.4706
75,,14,0,0,0.0,0.0,0.0,0.0,0.0


In [63]:
model_data_1_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,94,0,0,0.0,0.0,0.0,0.0,0.0
over,76,76,38,78.64,2.64,6192.9,6105.6774,-87.2226
under,136,136,80,164.83,28.83,11344.79,13677.2342,2332.4442


In [64]:
model_data_2_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,60,0,0,0.0,0.0,0.0,0.0,0.0
50,over,53,53,42,58.7,5.7,4199.27,4795.0203,595.7503
50,under,31,31,9,45.6,14.6,1746.38,2009.288,262.908
60,,58,0,0,0.0,0.0,0.0,0.0,0.0
60,over,47,47,35,55.23,8.23,3533.86,4324.8431,790.9831
60,under,39,39,16,55.94,16.94,2419.09,3396.7253,977.6353
70,,50,0,0,0.0,0.0,0.0,0.0,0.0
70,over,45,45,29,55.47,10.47,3154.42,3775.3085,620.8885
70,under,48,48,21,51.96,3.96,3113.07,3404.585,291.515
75,,48,0,0,0.0,0.0,0.0,0.0,0.0


In [65]:
model_data_2_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,328,0,0,0.0,0.0,0.0,0.0,0.0
over,250,250,149,288.12,38.12,16887.13,21032.5929,4145.4629
under,284,284,158,351.28,67.28,18759.17,23141.7965,4382.6265


In [66]:
model_data_3_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,11,0,0,0.0,0.0,0.0,0.0,0.0
50,over,15,15,8,11.82,-3.18,1279.49,998.1186,-281.3714
50,under,7,7,2,10.25,3.25,376.97,1025.0,648.03
60,,11,0,0,0.0,0.0,0.0,0.0,0.0
60,over,15,15,7,11.43,-3.57,1138.24,894.1435,-244.0965
60,under,8,8,4,12.66,4.66,565.55,892.388,326.838
70,,8,0,0,0.0,0.0,0.0,0.0,0.0
70,over,12,12,5,10.76,-1.24,664.24,432.4776,-231.7624
70,under,14,14,7,17.1,3.1,1253.78,1536.5304,282.7504
75,,17,0,0,0.0,0.0,0.0,0.0,0.0


In [67]:
model_data_3_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,71,0,0,0.0,0.0,0.0,0.0,0.0
over,67,67,29,59.99,-7.01,4762.58,4280.6399,-481.9401
under,64,64,41,89.56,25.56,5183.73,7575.3333,2391.6033


In [68]:
model_data_4_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,1,0,0,0.0,0.0,0.0,0.0,0.0
50,over,2,2,2,2.92,0.92,200.0,292.0,92.0
50,under,2,2,1,4.7,2.7,154.25,470.0,315.75
60,,1,0,0,0.0,0.0,0.0,0.0,0.0
60,over,1,1,1,1.57,0.57,100.0,157.0,57.0
60,under,2,2,1,3.4,1.4,112.31,340.0,227.69
70,,2,0,0,0.0,0.0,0.0,0.0,0.0
70,over,2,2,1,2.38,0.38,200.0,238.0,38.0
70,under,1,1,1,2.58,1.58,36.0,92.88,56.88
75,,1,0,0,0.0,0.0,0.0,0.0,0.0


In [69]:
model_data_4_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,8,0,0,0.0,0.0,0.0,0.0,0.0
over,7,7,5,10.17,3.17,700.0,1017.0,317.0
under,11,11,8,18.37,7.37,902.56,1671.88,769.32


In [149]:
from_date = '2023-01-11'
model_data_4_with_preds_unique[model_data_4_with_preds_unique['match_date']>=from_date].groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,4,0,0,0.0,0.0,0.0,0.0,0.0
50,over,5,5,2,2.6,-2.4,439.35,260.0,-179.35
50,under,3,3,1,6.4,3.4,147.44,640.0,492.56
60,,3,0,0,0.0,0.0,0.0,0.0,0.0
60,over,6,6,3,4.95,-1.05,542.5,380.575,-161.925
60,under,3,3,1,2.92,-0.08,300.0,292.0,-8.0
70,,3,0,0,0.0,0.0,0.0,0.0,0.0
70,over,1,1,1,3.1,2.1,29.61,91.791,62.181
70,under,8,8,4,10.0,2.0,761.29,1000.0,238.71
75,,5,0,0,0.0,0.0,0.0,0.0,0.0


In [150]:
model_data_4_with_preds_unique[model_data_4_with_preds_unique['match_date']>=from_date].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,21,0,0,0.0,0.0,0.0,0.0,0.0
over,18,18,9,18.9,0.9,1384.26,1314.8608,-69.3992
under,33,33,21,45.89,12.89,2872.2,4171.0879,1298.8879


In [129]:
min_date = '2023-01-11' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 on
max_date = '2023-01-31'

mask_1 = model_data_1_with_preds_unique['match_date'].between(min_date, max_date)
mask_2 = model_data_2_with_preds_unique['match_date'].between(min_date, max_date)
mask_3 = model_data_3_with_preds_unique['match_date'].between(min_date, max_date)
mask_4 = model_data_4_with_preds_unique['match_date'].between(min_date, max_date)

bets = sum(model_data_1_with_preds_unique.loc[mask_1, 'bet']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'bet']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'bet'] + sum(model_data_4_with_preds_unique.loc[mask_4, 'bet'])
wins = sum(model_data_1_with_preds_unique.loc[mask_1, 'win']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'win']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'win'] + sum(model_data_4_with_preds_unique.loc[mask_4, 'win'])
winnings = sum(model_data_1_with_preds_unique.loc[mask_1, 'winnings']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'winnings']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'winnings'] + sum(model_data_4_with_preds_unique.loc[mask_4, 'winnings'])

In [130]:
print(f'Total bets {bets}, total wins {wins}, total winnings {round(winnings,2)}, profit {round(100*(winnings-bets)/bets - 1,2)}%')

Total bets 51, total wins 30, total winnings 66.27, profit 28.94%


In [131]:
expected_wins = int(wins*bets/winnings)
expected_wins

23

In [132]:
expected_p = expected_wins/bets
expected_p

0.45098039215686275

In [133]:
actual_p = wins/bets
actual_p

0.5882352941176471

In [134]:
import scipy.stats as sps
sps.binom.cdf(wins, bets, expected_p)

0.9824240750945122

In [154]:
np.nanmin([5, 10, np.NAN])

5.0

In [None]:
pd.DataFrame

# Data issues to look into
* Duplicate matches - THIS IS BECAUSE IT RERUNS FOR ANOTHER MATCH WITHIN 1 MINUTE AND THE SAME MATCH IS STILL WITHIN THE ALLOWABLE TIME RANGE
* ODDS CHANGES A LOT IN ONE MINUTE LATE ON! - Seems like this is ok based on testing, can also focus on earlier bets if a worry
* SL and BF string matching issues
* Delay (and think about how this will affect things in practice
* Goals scored differences between data at odds time and the retrospective data collection
* Oddly large looking odds

#### Duplicate matches

In [70]:
model_data_2_with_preds_counts = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).size().reset_index().rename(columns={0: 'count'})
model_data_2_with_preds_with_counts = model_data_2_with_preds.merge(model_data_2_with_preds_counts, how='left', on=['match_ref', 'next_prediction_time'])
model_data_2_with_preds_with_dupes = model_data_2_with_preds_with_counts[model_data_2_with_preds_with_counts['count']>1].sort_values(['match_ref', 'match_date', 'datetime_utc'])

In [71]:
pd.options.display.max_columns=100
model_data_2_with_preds_with_dupes.iloc[:30, :100]

Unnamed: 0,match_ref,match_date,match_time,state,match_type,competition_id,competition_name,round,legs,leg,team_a_id,team_a_name,team_a_short_name,team_a_score,team_b_id,team_b_name,team_b_short_name,team_b_score,outcome,result_type,winner_id,winner_name,winner_short_name,half_time_score_home,half_time_score_away,full_time_score_home,full_time_score_away,clock,minutes_of_play,next_prediction_time,minutes_to_next_prediction_time,following_prediction_time,minutes_to_following_prediction_time,sporting_life_event_name,betfair_id,betfair_name,pred_prob_over,pred_odds_over,pred_prob_under,pred_odds_under,total_goals,market,market_type,market_id,runner_name_over,selection_id_over,actual_odds_over_back_1,size_over_back_1,actual_odds_over_back_2,size_over_back_2,actual_odds_over_back_3,size_over_back_3,runner_name_under,selection_id_under,actual_odds_under,actual_odds_under_back_1,size_under_back_1,actual_odds_under_back_2,size_under_back_2,actual_odds_under_back_3,size_under_back_3,is_delayed,delay_time,total_matched,total_available,actual_odds_under_lay_1,size_under_lay_1,actual_odds_under_lay_2,size_under_lay_2,actual_odds_under_lay_3,size_under_lay_3,actual_odds_over_lay_1,size_over_lay_1,actual_odds_over_lay_2,size_over_lay_2,actual_odds_over_lay_3,size_over_lay_3,datetime_utc,goals_pre_5m,goals_post_5m,goals_pre_10m,goals_5m_to_10m,goals_post_10m,goals_pre_15m,goals_10m_to_15m,goals_post_15m,goals_pre_20m,goals_15m_to_20m,goals_post_20m,goals_pre_25m,goals_20m_to_25m,goals_post_25m,goals_pre_30m,goals_25m_to_30m,goals_post_30m,goals_pre_35m,goals_30m_to_35m,goals_post_35m,goals_pre_40m,goals_35m_to_40m
3,144188,2023-01-07T15:30:00.000Z,15:30,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,1745,RKC Waalwijk,RKC,0,794,sc Heerenveen,Heerenveen,0,,,,,,,,,,50',50,50,0,60.0,10.0,RKC Waalwijk v sc Heerenveen,31996290,RKC Waalwijk v Heerenveen,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166502,Over 0.5 Goals,5851483.0,1.33,257.44,1.32,2138.13,1.31,2427.36,Under 0.5 Goals,5851482.0,,3.9,22.26,3.85,591.05,3.8,309.2,1.0,5.0,18312.12,21332.68,4.0,59.0,4.1,386.61,4.2,640.08,1.34,64.8,1.35,1105.78,1.36,2472.21,2023-01-07 16:37:07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,144188,2023-01-07T15:30:00.000Z,15:30,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,1745,RKC Waalwijk,RKC,0,794,sc Heerenveen,Heerenveen,0,,,,,,,,,,51',51,50,-1,60.0,9.0,RKC Waalwijk v sc Heerenveen,31996290,RKC Waalwijk v Heerenveen,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166502,Over 0.5 Goals,5851483.0,1.33,1534.72,1.32,1941.57,1.31,1325.92,Under 0.5 Goals,5851482.0,,3.85,73.91,3.8,296.23,3.75,594.39,1.0,5.0,19230.97,25882.33,4.0,281.38,4.1,501.91,4.2,663.83,1.34,10.58,1.36,2029.63,1.37,1512.97,2023-01-07 16:38:16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
304,144188,2023-01-07T15:30:00.000Z,15:30,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,1745,RKC Waalwijk,RKC,0,794,sc Heerenveen,Heerenveen,0,,,,,,,,,,60',60,60,0,70.0,10.0,RKC Waalwijk v sc Heerenveen,31996290,RKC Waalwijk v Heerenveen,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166502,Over 0.5 Goals,5851483.0,1.51,854.47,1.5,2291.66,1.49,623.38,Under 0.5 Goals,5851482.0,,2.88,23.49,2.86,98.41,2.84,263.38,1.0,5.0,30695.88,26577.05,2.96,186.92,2.98,377.31,3.0,1016.7,1.53,43.96,1.54,368.42,1.55,1256.93,2023-01-07 16:47:37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
268,144188,2023-01-07T15:30:00.000Z,15:30,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,1745,RKC Waalwijk,RKC,0,794,sc Heerenveen,Heerenveen,0,,,,,,,,,,61',61,60,-1,70.0,9.0,RKC Waalwijk v sc Heerenveen,31996290,RKC Waalwijk v Heerenveen,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166502,Over 0.5 Goals,5851483.0,1.52,1009.06,1.51,3927.44,1.5,1895.35,Under 0.5 Goals,5851482.0,,2.84,101.13,2.8,216.06,2.78,524.72,1.0,5.0,30811.92,24136.15,2.9,137.0,2.92,140.0,2.94,539.45,1.54,186.09,1.55,25.55,1.56,1006.74,2023-01-07 16:48:44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
562,144192,2023-01-08T15:45:00.000Z,15:45,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,803,Excelsior,Excelsior,0,48,FC Groningen,Groningen,0,,,,,,,,,,70',70,70,0,75.0,5.0,Excelsior v FC Groningen,31997903,Excelsior v FC Groningen,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208186889,Over 0.5 Goals,5851483.0,1.84,129.57,1.83,957.06,1.82,110.01,Under 0.5 Goals,5851482.0,,2.14,26.93,2.12,834.01,2.1,901.95,1.0,5.0,70778.59,19861.01,2.2,612.01,2.22,737.68,2.24,556.72,1.88,325.3,1.89,258.0,1.9,738.38,2023-01-08 17:14:57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
483,144192,2023-01-08T15:45:00.000Z,15:45,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,803,Excelsior,Excelsior,1,48,FC Groningen,Groningen,0,,,,,,,,,,71',71,70,-1,75.0,4.0,Excelsior v FC Groningen,31997903,Excelsior v FC Groningen,0.4,2.5,0.6,1.666667,1,Over/Under 1.5 Goals,OVER_UNDER_15,1.208186899,Over 1.5 Goals,1221386.0,1.75,23.08,1.72,21.62,1.68,46.07,Under 1.5 Goals,1221385.0,,1.85,58.01,1.84,59.25,1.65,188.65,1.0,5.0,69995.2,4507.39,2.32,17.67,2.38,15.63,2.46,31.46,2.18,99.24,2.52,123.52,4.3,49.15,2023-01-08 17:15:59,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
28,144204,2023-01-07T20:00:00.000Z,20:00,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,33,PSV,PSV,0,798,Sparta Rotterdam,Sparta,0,,,,,,,,,,50',50,50,0,60.0,10.0,PSV v Sparta Rotterdam,31996293,PSV v Sparta Rotterdam,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166232,Over 0.5 Goals,5851483.0,1.18,147.79,1.17,1505.74,1.16,58.93,Under 0.5 Goals,5851482.0,,6.2,56.36,6.0,405.5,5.9,191.54,1.0,5.0,70296.13,22605.9,6.6,121.89,6.8,35.12,7.0,127.54,1.19,154.59,1.2,2165.39,1.21,3247.91,2023-01-07 21:08:44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
180,144204,2023-01-07T20:00:00.000Z,20:00,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,33,PSV,PSV,0,798,Sparta Rotterdam,Sparta,0,,,,,,,,,,51',51,50,-1,60.0,9.0,PSV v Sparta Rotterdam,31996293,PSV v Sparta Rotterdam,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166232,Over 0.5 Goals,5851483.0,1.18,147.79,1.17,1505.74,1.16,58.93,Under 0.5 Goals,5851482.0,,6.2,56.36,6.0,405.5,5.9,191.54,1.0,5.0,28406.52,70597.78,6.6,121.89,6.8,35.12,7.0,127.54,1.19,154.59,1.2,2165.39,1.21,3247.91,2023-01-07 21:09:47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
513,144204,2023-01-07T20:00:00.000Z,20:00,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,33,PSV,PSV,0,798,Sparta Rotterdam,Sparta,0,,,,,,,,,,70',70,70,0,75.0,5.0,PSV v Sparta Rotterdam,31996293,PSV v Sparta Rotterdam,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166232,Over 0.5 Goals,5851483.0,1.62,17.26,1.61,1620.83,1.6,2969.41,Under 0.5 Goals,5851482.0,,2.6,45.14,2.58,340.11,2.56,784.88,1.0,5.0,97855.5,84482.91,2.62,873.04,2.64,1242.0,2.66,534.14,1.63,228.53,1.64,948.46,1.65,2108.95,2023-01-07 21:29:14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
500,144204,2023-01-07T20:00:00.000Z,20:00,SECONDHALF,Regular,46,Dutch Eredivisie,{'round_number': 1},1,1,33,PSV,PSV,0,798,Sparta Rotterdam,Sparta,0,,,,,,,,,,71',71,70,-1,75.0,4.0,PSV v Sparta Rotterdam,31996293,PSV v Sparta Rotterdam,0.4,2.5,0.6,1.666667,0,Over/Under 0.5 Goals,OVER_UNDER_05,1.208166232,Over 0.5 Goals,5851483.0,1.69,13.51,1.68,892.16,1.67,741.6,Under 0.5 Goals,5851482.0,,2.4,31.3,2.36,624.95,2.34,753.93,1.0,5.0,99859.1,56102.22,2.46,273.31,2.48,592.71,2.5,464.67,1.71,42.22,1.72,12.7,1.73,320.47,2023-01-07 21:30:17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Delay times

In [72]:
model_data_2_with_preds_unique['delay_time'].describe()

count    859.000000
mean       5.114086
std        0.886850
min        5.000000
25%        5.000000
50%        5.000000
75%        5.000000
max       12.000000
Name: delay_time, dtype: float64

#### Weirdly large looking odds

In [97]:
min_date = '2023-01-11' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 on
max_date = '2023-01-12'
prediction_time = 80
over_under = 'over'

mask = (
    model_data_3_with_preds_unique['match_date'].between(min_date, max_date) &
    (model_data_3_with_preds_unique['next_prediction_time']==prediction_time) &
    (model_data_3_with_preds_unique['action']==over_under)
)

model_data_3_with_preds_unique.loc[
    mask,
    ['match_date', 'team_a_name', 'team_b_name', 'betfair_name', 'team_a_score', 'team_b_score', 'runner_name_over',
     f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',
     'action', 'bet', 'win', 'winnings', 'max_bet_amount', 'max_winnings', 'profit','max_profit']]

Unnamed: 0,match_date,team_a_name,team_b_name,betfair_name,team_a_score,team_b_score,runner_name_over,goals_pre_80m,goals_post_80m,outcome,goals_pre_80m_added_on_data,goals_post_80m_added_on_data,clock,lm_odds_over,rf_odds_over,lm_odds_under,rf_odds_under,actual_odds_over_back_1,actual_odds_under_back_1,delay_time,team_a_score_added_on_data,team_b_score_added_on_data,action,bet,win,winnings,max_bet_amount,max_winnings,profit,max_profit
266,2023-01-11T18:00:00.000Z,Clermont,Rennes,Clermont v Rennes,1,1,Over 2.5 Goals,2,0,1,2.0,1.0,80',2.374937,2.485378,1.727306,1.673229,2.76,1.56,5,2.0,1.0,over,1,1,2.76,45.17,124.6692,1.76,79.4992
270,2023-01-11T18:00:00.000Z,Ajaccio,Reims,AC Ajaccio v Reims,0,1,Over 1.5 Goals,1,0,0,1.0,0.0,80',2.504973,2.539594,1.664464,1.649522,2.56,1.62,5,0.0,1.0,over,1,0,0.0,100.0,0.0,-1.0,-100.0
312,2023-01-11T19:45:00.000Z,Queen's Park,Raith Rovers,Queens Park v Raith,0,1,Over 1.5 Goals,0,1,1,0.0,1.0,80',2.70256,2.654838,1.587351,1.604289,9.8,1.1,5,0.0,1.0,over,1,1,9.8,24.74,242.452,8.8,217.712


#### Check test vs actual results

In [73]:
model_data_4_with_preds['action'] = 'None'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['actual_odds_over_lay_1']/model_data_4_with_preds['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['actual_odds_under_lay_1']/model_data_4_with_preds['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [74]:
# need to add outcomes
model_data_4_with_preds['outcome'] = None
for p in prediction_times:
    mask = model_data_4_with_preds['next_prediction_time']==p
    model_data_4_with_preds.loc[mask, 'outcome_orig'] = (model_data_4_with_preds.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1

model_data_4_with_preds['outcome'] = (model_data_4_with_preds['total_goals_added_on_data'] > model_data_4_with_preds['total_goals'])*1

In [75]:
model_data_4_with_preds['bet'] = 1
model_data_4_with_preds.loc[model_data_4_with_preds['action'] == 'None', 'bet'] = 0
model_data_4_with_preds['win'] = 0
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['outcome']==1), 'win'] = 1
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['outcome']==0), 'win'] = 1

In [76]:
model_data_4_with_preds['winnings'] = 0

over_win_mask = (model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[over_win_mask, 'winnings'] = model_data_4_with_preds.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[under_win_mask, 'winnings'] = model_data_4_with_preds.loc[under_win_mask, 'actual_odds_under_back_3']

In [77]:
model_data_4_with_preds['max_bet_amount'] = 0

under_mask = model_data_4_with_preds['action']=='under'
model_data_4_with_preds.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds['action']=='over'
model_data_4_with_preds.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds['max_winnings'] = model_data_4_with_preds['max_bet_amount']*model_data_4_with_preds['winnings']

model_data_4_with_preds['profit'] = model_data_4_with_preds['winnings'] - model_data_4_with_preds['bet']
model_data_4_with_preds['max_profit'] = model_data_4_with_preds['max_winnings'] - model_data_4_with_preds['max_bet_amount']

In [81]:
pd.options.display.max_columns = 100
model_data_4_with_preds[['match_date', 'team_a_name', 'team_b_name', 'betfair_name', 'team_a_score', 'team_b_score', 'runner_name_over',
     #f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',  'datetime_utc',
     'action', 'bet', 'win', 'winnings', 'max_bet_amount', 'max_winnings', 'profit','max_profit']].sort_values(['team_a_name', 'datetime_utc'])

Unnamed: 0,match_date,team_a_name,team_b_name,betfair_name,team_a_score,team_b_score,runner_name_over,clock,lm_odds_over,rf_odds_over,lm_odds_under,rf_odds_under,actual_odds_over_back_1,actual_odds_under_back_1,delay_time,team_a_score_added_on_data,team_b_score_added_on_data,datetime_utc,action,bet,win,winnings,max_bet_amount,max_winnings,profit,max_profit
0,2023-01-12T16:00:00.000Z,Finland,Estonia,Finland v Estonia,0,0,Over 0.5 Goals,50',1.505008,1.452128,2.980167,3.211762,1.62,2.52,5,0,1,2023-01-12 17:07:35,over,1,1,1.6,100.0,160.0,0.6,60.0
1,2023-01-12T16:00:00.000Z,Finland,Estonia,Finland v Estonia,0,0,Over 0.5 Goals,70',2.17794,1.945865,1.84894,2.057233,2.42,1.7,5,0,1,2023-01-12 17:27:23,over,1,1,2.38,100.0,238.0,1.38,138.0
2,2023-01-12T16:00:00.000Z,Finland,Estonia,Finland v Estonia,0,0,Over 0.5 Goals,80',3.238548,2.682197,1.446718,1.594461,3.4,1.4,5,0,1,2023-01-12 17:37:06,over,1,1,3.3,100.0,330.0,2.3,230.0
3,2023-01-12T17:00:00.000Z,Fiorentina,Sampdoria,Fiorentina v Sampdoria,1,0,Over 1.5 Goals,51',1.291774,1.322225,4.427311,4.103419,1.25,4.9,5,1,0,2023-01-12 18:12:27,under,1,1,4.7,100.0,470.0,3.7,370.0
4,2023-01-12T17:00:00.000Z,Fiorentina,Sampdoria,Fiorentina v Sampdoria,1,0,Over 1.5 Goals,60',1.467207,1.489377,3.140378,3.043414,1.39,3.5,5,1,0,2023-01-12 18:21:33,under,1,1,3.4,100.0,340.0,2.4,240.0
5,2023-01-12T17:00:00.000Z,Fiorentina,Sampdoria,Fiorentina v Sampdoria,1,0,Over 1.5 Goals,70',1.784971,1.811691,2.273933,2.231996,1.6,2.62,5,1,0,2023-01-12 18:31:42,under,1,1,2.58,36.0,92.88,1.58,56.88
6,2023-01-12T17:00:00.000Z,Fiorentina,Sampdoria,Fiorentina v Sampdoria,1,0,Over 1.5 Goals,80',2.578031,2.58608,1.633701,1.630485,2.44,1.68,5,1,0,2023-01-12 18:41:45,under,1,1,1.66,100.0,166.0,0.66,66.0
7,2023-01-12T17:00:00.000Z,Fiorentina,Sampdoria,Fiorentina v Sampdoria,1,0,Over 1.5 Goals,85',3.489382,3.544206,1.401706,1.39305,2.84,1.53,5,1,0,2023-01-12 18:46:53,under,1,1,1.51,100.0,151.0,0.51,51.0
16,2023-01-12T20:00:00.000Z,Fulham,Chelsea,Fulham v Chelsea,1,1,Over 2.5 Goals,50',1.300081,1.329389,4.332431,4.035927,1.34,3.85,5,2,1,2023-01-12 21:09:33,over,1,1,1.32,100.0,132.0,0.32,32.0
18,2023-01-12T20:00:00.000Z,Fulham,Chelsea,Fulham v Chelsea,1,1,Over 2.5 Goals,60',1.46094,1.515042,3.16948,2.94159,1.59,2.66,5,2,1,2023-01-12 21:19:48,over,1,1,1.57,100.0,157.0,0.57,57.0


#### Testing multiple bets at once

In [84]:
header = {'X-Application': application, 'Content-Type': 'application/x-www-form-urlencoded'}
auth = 'username='+username+'&password='+password
bet_url = "https://api.betfair.com/exchange/betting/json-rpc/v1"

In [85]:
login = requests.post('https://identitysso-cert.betfair.com/api/certlogin',
                      cert=('/etc/ssl/client-2048.crt', '/etc/ssl/client-2048.key'),
                      headers=header, data=auth, timeout=30)

if login.status_code==503: # Betfair site down code - they don't give expected time so just got to keep trying
    logging.error('Login error '+str(login.status_code))
    print('\nLogin error, trying again in 1 minute')
    time.sleep(60)

else:
    login_success = login.json()['loginStatus']
    if login_success=='TEMPORARY_BAN_TOO_MANY_REQUESTS':
        print(f'Login response is TEMPORARY_BAN_TOO_MANY_REQUESTS so continue with existing ssoid')
    elif login_success!='SUCCESS':
        print(f'Login unsuccessful due to LoginStatus: {login_success}, try to continue with existing login')
    else:
        logging.info('Login '+str(login_success))
        ssoid = login.json()['sessionToken']
        print('\nLogged in!')


Logged in!


In [177]:
headers = {'X-Application': application, 'X-Authentication': ssoid, 'content-type': 'application/json'}

events = []
event_type_id = '["1"]'
market_start_time = (datetime.datetime.now() + datetime.timedelta(hours=-10)).strftime('%Y-%m-%dT%H:%M:%SZ')
market_end_time = (datetime.datetime.now() + datetime.timedelta(hours=24)).strftime('%Y-%m-%dT%H:%M:%SZ')
inplay = 'true'

user_req='{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listEvents",\
           "params": {"filter":{"eventTypeIds":'+event_type_id+',\
           "inPlayOnly":'+inplay+', \
           "marketStartTime":{"from":"'+market_start_time+'", "to":"'+market_end_time+'"}}}, "id": 1}'

request = requests.post(bet_url, data=user_req.encode('utf-8'), headers=headers)
events += request.json()['result']

events_df = pd.DataFrame([[e['event']['id'], e['event']['name']] for e in events], columns=['betfair_id', 'betfair_name'])


In [178]:
events_df

Unnamed: 0,betfair_id,betfair_name
0,32025135,Maccabi Tzur Shalom v Ihud Bnei Shfaram
1,32025344,SC Dimona v Ironi Ashdod Sportclub
2,32023168,SC Hapoel Segev Shalom v Maccabi Ramla
3,32025152,Hapoel Tzafririm Holon v Ironi Kuseife
4,32023617,MFK Chrudim v SFC Opava
5,32025155,Hapoel Migdal Haemek v Maccabi Tamra
6,32025156,MS Tzeirey Taibe v Hapoel Raanana
7,32025158,Maccabi Shaaraim v Hapoel Bikat Hayarden
8,32023646,Viborg v Vendsyssel FF
9,32025151,Ironi Baka El Garbiya v Maccabi Nujeidat Ahmad


In [179]:
event_id1 = '32025158'

In [180]:
event_type_id = '["1"]'
match_event_id = f'["{event_id1}"]'
market_types = '["MATCH_ODDS"]'
market_start_time = (datetime.datetime.now() + datetime.timedelta(hours=-24)).strftime('%Y-%m-%dT%H:%M:%SZ')
market_end_time = (datetime.datetime.now() + datetime.timedelta(hours=24)).strftime('%Y-%m-%dT%H:%M:%SZ')
max_results = str(200)
sort_type = 'FIRST_TO_START'
metadata = '["EVENT_TYPE", "COMPETITION", "EVENT", "MARKET_START_TIME", "MARKET_DESCRIPTION", "RUNNER_DESCRIPTION"]' #, "RUNNER_METADATA"]'
inplay = 'true'

user_req='{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketCatalogue",\
           "params": {"filter":{"eventTypeIds":'+event_type_id+',"marketTypeCodes":'+market_types+',\
           "inPlayOnly":'+inplay+', "eventIds":'+match_event_id+',  \
           "marketStartTime":{"from":"'+market_start_time+'", "to":"'+market_end_time+'"}},\
           "sort":"'+sort_type+'", "maxResults":"'+max_results+'", "marketProjection":'+metadata+'}, "id": 1}'

request = requests.post(bet_url, data=user_req.encode('utf-8'), headers=headers)


In [181]:
request_result = request.json()['result'][0]
request_result

{'marketId': '1.208619069',
 'marketName': 'Match Odds',
 'marketStartTime': '2023-01-13T10:48:22.000Z',
 'description': {'persistenceEnabled': True,
  'bspMarket': False,
  'marketTime': '2023-01-13T10:48:22.000Z',
  'suspendTime': '2023-01-13T10:48:22.000Z',
  'bettingType': 'ODDS',
  'turnInPlayEnabled': True,
  'marketType': 'MATCH_ODDS',
  'regulator': 'GIBRALTAR REGULATOR',
  'marketBaseRate': 2.0,
  'discountAllowed': False,
  'wallet': 'UK wallet',
  'rules': '<!--Football - Match Odds --><br>Predict the result of this match.<br> All bets apply to Full Time according to the match officials, plus any stoppage time. Extra-time/penalty shoot-outs are not included.<br><br></b>For further information please see <a href=http://content.betfair.com/aboutus/content.asp?sWhichKey=Rules%20and%20Regulations#undefined.do style=color:0163ad; text-decoration: underline; target=_blank>Rules & Regs<br><br>\n',
  'rulesHasDate': True,
  'priceLadderDescription': {'type': 'CLASSIC'}},
 'totalMatc

In [182]:
priceProjection = '["EX_BEST_OFFERS"]'
prices_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketBook", "params": {"marketIds": ["' + str(request_result['marketId']) + '"],"priceProjection":{"priceData":["EX_BEST_OFFERS"]}}, "id": 1}'
request = requests.post(bet_url, data=prices_req.encode('utf-8'), headers=headers)
prices_result = request.json()['result'][0]

In [183]:
prices_result

{'marketId': '1.208619069',
 'isMarketDataDelayed': True,
 'status': 'OPEN',
 'betDelay': 5,
 'bspReconciled': False,
 'complete': True,
 'inplay': True,
 'numberOfWinners': 1,
 'numberOfRunners': 3,
 'numberOfActiveRunners': 3,
 'lastMatchTime': '2023-01-13T11:18:43.375Z',
 'totalMatched': 734.58,
 'totalAvailable': 4432.03,
 'crossMatching': True,
 'runnersVoidable': False,
 'version': 4990302803,
 'runners': [{'selectionId': 6953288,
   'handicap': 0.0,
   'status': 'ACTIVE',
   'lastPriceTraded': 1.34,
   'totalMatched': 0.0,
   'ex': {'availableToBack': [{'price': 1.31, 'size': 54.14},
     {'price': 1.3, 'size': 43.98},
     {'price': 1.29, 'size': 233.57}],
    'availableToLay': [{'price': 1.36, 'size': 79.1},
     {'price': 1.37, 'size': 83.37},
     {'price': 1.38, 'size': 103.45}],
    'tradedVolume': []}},
  {'selectionId': 10319918,
   'handicap': 0.0,
   'status': 'ACTIVE',
   'lastPriceTraded': 13.5,
   'totalMatched': 0.0,
   'ex': {'availableToBack': [{'price': 13.0, 's

In [184]:
import time

In [186]:
market_id1 = str(prices_result['marketId'])
selection_id1 = str(prices_result['runners'][0]['selectionId'])
#available = row['size_over_back_1']
bet_size1 = str(1)
price1 = str(prices_result['runners'][0]['ex']['availableToBack'][0]['price'] - 0.22222)
min_fill_size1 = str(1)

order_request1 = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",\
            "params": {"marketId":"' + market_id1 + '","instructions":[\
            {"selectionId":"' + selection_id1 + '","handicap":"0","side":"BACK","orderType":"LIMIT",\
            "limitOrder":{"size":"' + bet_size1 + '","price":"' + price1 + '","persistenceType":"LAPSE",\
            "timeInForce":"FILL_OR_KILL", "minFillSize":"' + min_fill_size1 + '"}}]}, "id": 1}'

order_request1 = {"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",
            "params": {"marketId": market_id1 ,"instructions":[
            {"selectionId": selection_id1, "handicap":"0", "side":"BACK", "orderType":"LIMIT",
            "limitOrder": {"size": bet_size1, "price": price1, "persistenceType":"LAPSE",
            "timeInForce":"FILL_OR_KILL", "minFillSize": min_fill_size1}}]}, "id": 1}

market_id2 = str(prices_result['marketId'])
selection_id2 = str(prices_result['runners'][2]['selectionId'])
#available = row['size_over_back_1']
bet_size2 = str(1)
price2 = str(prices_result['runners'][2]['ex']['availableToBack'][0]['price'] - 0.2)
min_fill_size2 = str(1)

order_request2 = {"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",
            "params": {"marketId": market_id2 ,"instructions":[
            {"selectionId": selection_id2, "handicap":"0", "side":"BACK", "orderType":"LIMIT",
            "limitOrder": {"size": bet_size2, "price": price2, "persistenceType":"LAPSE",
            "timeInForce":"FILL_OR_KILL", "minFillSize": min_fill_size2}}]}, "id": 1}

order_request_comb = f'[{order_request1}, {order_request2}]'
order_request_comb = f'[{order_request2}]'
order_request_comb = str([order_request1, order_request2]).replace("'", '"')

start_time = time.time()
request = requests.post(bet_url, data=order_request_comb.encode('utf-8'), headers=headers, timeout=30)
order_result = request.json()
end_time = time.time()
print(f'Time taken: {round(end_time - start_time, 2)}s')

Time taken: 0.19s


In [188]:
price2

'4.58'

In [187]:
request.json()

[{'jsonrpc': '2.0',
  'result': {'status': 'FAILURE',
   'errorCode': 'BET_ACTION_ERROR',
   'marketId': '1.208619069',
   'instructionReports': [{'status': 'FAILURE',
     'errorCode': 'INVALID_ODDS',
     'instruction': {'selectionId': 58805,
      'handicap': 0.0,
      'limitOrder': {'size': 1.0,
       'price': 4.58,
       'minFillSize': 1.0,
       'timeInForce': 'FILL_OR_KILL'},
      'orderType': 'LIMIT',
      'side': 'BACK'}}]},
  'id': 1},
 {'jsonrpc': '2.0',
  'result': {'status': 'FAILURE',
   'errorCode': 'BET_ACTION_ERROR',
   'marketId': '1.208619069',
   'instructionReports': [{'status': 'FAILURE',
     'errorCode': 'INVALID_ODDS',
     'instruction': {'selectionId': 6953288,
      'handicap': 0.0,
      'limitOrder': {'size': 1.0,
       'price': 1.08778,
       'minFillSize': 1.0,
       'timeInForce': 'FILL_OR_KILL'},
      'orderType': 'LIMIT',
      'side': 'BACK'}}]},
  'id': 1}]

In [176]:
pd.DataFrame([], columns=['market_id', 'selection_id', 'available', 'bet_size', 'price', 'min_fill_size', 'market_version'])

Unnamed: 0,market_id,selection_id,available,bet_size,price,min_fill_size,market_version


In [158]:
test_dict = [{'test': "2.0"}, {'test': "1.0"}]
test_dict

[{'test': '2.0'}, {'test': '1.0'}]

In [159]:
str(test_dict)

"[{'test': '2.0'}, {'test': '1.0'}]"

In [160]:
order_request_comb = f'[{order_request1}, {order_request2}]'
order_request_comb

'[{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",            "params": {"marketId":"1.208565700","instructions":[            {"selectionId":"22296239","handicap":"0","side":"BACK","orderType":"LIMIT",            "limitOrder":{"size":"1","price":"4.6","persistenceType":"LAPSE",            "timeInForce":"FILL_OR_KILL", "minFillSize":"1"}}]}, "id": 1}, {"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",            "params": {"marketId":"1.208565700","instructions":[            {"selectionId":"58805","handicap":"0","side":"BACK","orderType":"LIMIT",            "limitOrder":{"size":"1","price":"2.08","persistenceType":"LAPSE",            "timeInForce":"FILL_OR_KILL", "minFillSize":"1"}}]}, "id": 1}]'

In [214]:
def get_valid_price(p):
    # Price requirements    
    # 1.01 → 2	0.01
    # 2→ 3	0.02
    # 3 → 4	0.05
    # 4 → 6	0.1
    # 6 → 10	0.2
    # 10 → 20	0.5
    # 20 → 30	1
    # 30 → 50	2
    # 50 → 100	5
    # 100 → 1000	10
    
    if p <= 2:
        r = 0.01
        d = 2
        p = round(np.ceil(p/r)*r, d)

    elif p <= 3:
        r = 0.02
        d = 2
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 4:
        r = 0.05
        d = 2
        p = round(np.ceil(p/r)*r, d)
    
    elif p <= 6:
        r = 0.1
        d = 1
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 10:
        r = 0.2
        d = 1
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 20:
        r = 0.5
        d = 1
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 30:
        r = 1
        d = 0
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 50:
        r = 2
        d = 0
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 100:
        r = 5
        d = 0
        p = round(np.ceil(p/r)*r, d)
        
    elif p <= 1000:
        r = 10
        d = 0
        p = round(np.ceil(p/r)*r, d)
        
    else:
        return 999999
    
    return p

In [217]:
get_valid_price(20.2346)

21.0

In [204]:
3.01/0.02

150.5

In [211]:
round(np.ceil(3.01/0.02)*0.02, 2)

3.02

In [213]:
p = 3.01
r = 0.02
d = 2
p = round(np.ceil(p/r)*r, d)
p

3.02