# Get Previous Outcomes From Sporting Life

In [1]:
import requests
import urllib
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook, tqdm
import datetime
import time
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw
import logging

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb
import statsmodels.api as sm

from Levenshtein import distance as levenshtein_distance

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None

# Get Data For Dates

In [4]:
past_dates = pd.date_range(start='2023-01-02', end='2023-01-15')

In [5]:
matches_data = []
goals_data = []
failed_dates = []

In [6]:
for d in tqdm(past_dates):
    try:
        yyyymmdd = d.strftime('%Y')+'-'+d.strftime('%m')+'-'+d.strftime('%d')
        dateurl = 'https://www.sportinglife.com/football/fixtures-results/'+yyyymmdd
        datejson = urllib.request.urlopen(dateurl).read()
        soup = BeautifulSoup(datejson)
        soup_find = soup.body.find(attrs={"type": "application/json"})
        soup_json = json.loads(soup_find.text)
        
        matches = soup_json.get('props', {}).get('pageProps', {}).get('matches', [])
        for m in matches:
            match_ref = m.get('match_reference', {}).get('id')
            match_date = m.get('match_date')
            match_time = m.get('match_time')

            matches_data.append(
                [
                    match_ref, match_date, match_time,
                    m.get('state'),
                    m.get('match_type'), 
                    m.get('competition', {}).get('competition_reference', {}).get('id'),
                    m.get('competition', {}).get('name'),
                    str(m.get('round')),
                    m.get('legs'),
                    m.get('leg'),
                    m.get('team_score_a', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_a', {}).get('team', {}).get('name'),
                    m.get('team_score_a', {}).get('team', {}).get('short_name'),
                    m.get('team_score_a', {}).get('score', [])[0].get('score'),
                    m.get('team_score_b', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_b', {}).get('team', {}).get('name'),
                    m.get('team_score_b', {}).get('team', {}).get('short_name'),
                    m.get('team_score_b', {}).get('score', [])[0].get('score'),
                    m.get('match_outcome', {}).get('outcome'),
                    m.get('match_outcome', {}).get('result_type'),
                    m.get('match_outcome', {}).get('winner', {}).get('team_reference', {}).get('id'),
                    m.get('match_outcome', {}).get('winner', {}).get('name'),
                    m.get('match_outcome', {}).get('winner', {}).get('short_name'),
                    m.get('half_time_score', {}).get('home'),
                    m.get('half_time_score', {}).get('away'),
                    m.get('full_time_score', {}).get('home'),
                    m.get('full_time_score', {}).get('away'),
                    m.get('clock')
                ]
            )

            homegoals = m.get('homeGoals', [])
            for g in homegoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'home'
                        ]
                    )

            awaygoals = m.get('awayGoals', [])
            for g in awaygoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'away'
                        ]
                    )
        
    except:
        failed_dates.append(d)

100%|██████████| 14/14 [00:07<00:00,  1.76it/s]


In [7]:
matches_cols = [
    'match_ref', 'match_date', 'match_time', 'state', 'match_type', 'competition_id', 'competition_name', 
    'round', 'legs', 'leg', 'team_a_id', 'team_a_name', 'team_a_short_name', 'team_a_score',
    'team_b_id', 'team_b_name', 'team_b_short_name', 'team_b_score',
    'outcome', 'result_type', 'winner_id', 'winner_name', 'winner_short_name',
    'half_time_score_home', 'half_time_score_away',
    'full_time_score_home', 'full_time_score_away', 'clock'
]
matches_df = pd.DataFrame(matches_data, columns=matches_cols)

In [8]:
goals_cols = [
    'match_ref', 'match_date', 'match_time', 'player', 'goal_id', 'type', 'time', 'event_id', 'event_time', 'side'
]
goals_df = pd.DataFrame(goals_data, columns=goals_cols)

In [9]:
def get_extra_time(t):
    time_split = t.replace("'", "").split("+")
    if len(time_split)>1:
        return int(time_split[1])
    else:
        return 0

goals_df['time_regular'] = goals_df['time'].apply(lambda x: int(x.replace("'", "").split("+")[0]))
goals_df['time_extra'] = goals_df['time'].apply(get_extra_time)

In [10]:
matches_df['year'] = matches_df['match_date'].apply(lambda x: x[:4])

In [11]:
time_cutoffs = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
cap_at = 90
time_features = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_pre_{t}m']
    if t > min(time_cutoffs):
        goals_df[f'goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'goals_pre_{t}m'] - goals_df[f'goals_pre_{time_cutoffs[i-1]}m'])
        time_features += [f'goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_post_{t}m']

# same for home and away goals
time_features_home_away = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'home_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_pre_{t}m']
    
    goals_df[f'away_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_pre_{t}m']
    
    if t > min(time_cutoffs):
        goals_df[f'home_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'home_goals_pre_{t}m'] - goals_df[f'home_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'home_goals_{time_cutoffs[i-1]}m_to_{t}m']
        
        goals_df[f'away_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'away_goals_pre_{t}m'] - goals_df[f'away_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'away_goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'home_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_post_{t}m']
    
    goals_df[f'away_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_post_{t}m']

In [12]:
goals_features = goals_df.groupby(['match_ref', 'match_date'])[time_features+time_features_home_away].sum().reset_index()

In [13]:
model_data = matches_df.merge(goals_features, how='left', on=['match_ref', 'match_date'])

for f in time_features + time_features_home_away:
    model_data[f] = model_data[f].fillna(0)
    
for t in time_cutoffs:
    model_data[f'goal_diff_at_{t}'] = model_data[f'home_goals_pre_{t}m'] - model_data[f'away_goals_pre_{t}m']
    model_data[f'abs_goal_diff_at_{t}'] = abs(model_data[f'goal_diff_at_{t}'])

In [14]:
# check only include games with no goals data if score is 0 - 0
model_data['goals_data_num_goals'] = model_data['goals_pre_90m'] + model_data['goals_post_90m']
model_data['matches_data_num_goals'] = model_data['team_a_score'] + model_data['team_b_score']

In [15]:
model_data_no_goals_data = model_data[model_data['goals_data_num_goals'] != model_data['matches_data_num_goals']]
model_data = model_data[model_data['goals_data_num_goals'] == model_data['matches_data_num_goals']]

In [16]:
model_data = model_data[model_data['half_time_score_home'].notnull()]

In [17]:
model_data['any_goal_post_50'] = (model_data['goals_post_50m']>0)*1
model_data['any_goal_post_60'] = (model_data['goals_post_60m']>0)*1
model_data['any_goal_post_70'] = (model_data['goals_post_70m']>0)*1
model_data['any_goal_post_75'] = (model_data['goals_post_75m']>0)*1
model_data['any_goal_post_80'] = (model_data['goals_post_80m']>0)*1
model_data['any_goal_post_85'] = (model_data['goals_post_85m']>0)*1

In [18]:
model_data['total_goals'] = model_data['team_a_score'] + model_data['team_b_score']
model_data['score_diff'] = model_data['team_a_score'] - model_data['team_b_score']
model_data['score_diff_abs'] = abs(model_data['score_diff'])

In [19]:
model_data['year'] = model_data['match_date'].apply(lambda x: x[:4]).astype(int)
model_data['month'] = model_data['match_date'].apply(lambda x: x[5:7]).astype(int)

In [20]:
model_data.shape

(472, 236)

# Get Data from DB

In [21]:
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/sl_bf_late_goals'
sql_engine = sqlalchemy.create_engine(connect_string)
testing_viable_matches = pd.read_sql('''SELECT * FROM testing_viable_matches''', con=sql_engine)
testing2_viable_matches = pd.read_sql('''SELECT * FROM testing2_viable_matches''', con=sql_engine)
testing_model_data = pd.read_sql('''SELECT * FROM testing_model_data''', con=sql_engine)
testing2_model_data = pd.read_sql('''SELECT * FROM testing2_model_data''', con=sql_engine)

In [22]:
testing_viable_matches.shape

(461, 78)

In [23]:
testing2_viable_matches.shape

(1299, 78)

In [24]:
select_cols1 = f'select v.{testing_viable_matches.columns[0]}'
for c in testing_viable_matches.columns[1:]:
    select_cols1 += f', v.{c}'
for c in testing_model_data.columns:
    if c not in testing_viable_matches.columns:
        select_cols1 += f', m.{c}'
        
select_cols2 = f'select v.{testing2_viable_matches.columns[0]}'
for c in testing2_viable_matches.columns[1:]:
    select_cols2 += f', v.{c}'
for c in testing2_model_data.columns:
    if c not in testing2_viable_matches.columns:
        select_cols2 += f', m.{c}'

In [25]:
viable_matches_with_model_data = pd.read_sql(f'''{select_cols1} FROM testing_viable_matches v LEFT JOIN testing_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data2 = pd.read_sql(f'''{select_cols2} FROM testing2_viable_matches v LEFT JOIN testing2_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data3 = pd.read_sql(f'''SELECT * FROM testing3_model_data_with_preds''', con=sql_engine)
viable_matches_with_model_data4 = pd.read_sql(f'''SELECT * FROM testing_live_model_data_with_preds''', con=sql_engine)

In [26]:
viable_matches_with_model_data.shape

(461, 131)

In [27]:
viable_matches_with_model_data2.shape

(1299, 277)

In [28]:
viable_matches_with_model_data3.shape

(360, 283)

In [29]:
viable_matches_with_model_data3.shape

(360, 283)

In [30]:
# add on the extra features needed for the original v1 data collected
viable_matches_with_model_data1 = viable_matches_with_model_data.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data2 = viable_matches_with_model_data2.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data3 = viable_matches_with_model_data3.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data4 = viable_matches_with_model_data4.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))

# Get models and do predictions

In [31]:
# # football models
# with open('/home/angus/projects/betting/football/models/late_goals_test_model_1.pickle', 'rb') as f:
#     test_model_1_80m = pickle.load(f)
with open('/home/angus/projects/betting/football/models/late_goals_test_models_2.pickle', 'rb') as f:
    models_dicts = pickle.load(f)

In [32]:
models_dicts

{'any_goal_post_50': {'lin_mod': <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7f5dccee8470>,
  'rf_mod': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=6, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=2, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=16,
                         n_jobs=None, oob_score=False, random_state=None,
                         verbose=0, warm_start=False),
  'xgb_mod': XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                colsample_bynode=1, colsample_bytree=1, gamma=0,
                learning_rate=0.1, max_delta_step=0, max_depth=2,
                min_child_weight=1, missing=nan, n_estimators=4, n_jobs=1,
                nthread=None, objective='binary:logistic', random_state=0,
          

In [33]:
prediction_times = [50, 60, 70, 75, 80, 85]
model_data_1_with_preds = []
model_data_2_with_preds = []
for p in prediction_times:
    model_data_1_sub = viable_matches_with_model_data1[viable_matches_with_model_data1['next_prediction_time']==p]
    model_data_2_sub = viable_matches_with_model_data2[viable_matches_with_model_data2['next_prediction_time']==p]

    train_rc_comp = models_dicts[f'any_goal_post_{p}']['train_rc_comp']

    model_data_1_sub['competition_name_rc'] = model_data_1_sub['competition_name']
    model_data_1_sub.loc[~model_data_1_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_1_sub = model_data_1_sub.merge(train_rc_comp, how='left', on='competition_name_rc')

    model_data_2_sub['competition_name_rc'] = model_data_2_sub['competition_name']
    model_data_2_sub.loc[~model_data_2_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_2_sub = model_data_2_sub.merge(train_rc_comp, how='left', on='competition_name_rc')
    
    model_data_1_sub = model_data_1_sub[model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    model_data_2_sub = model_data_2_sub[model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    
    if len(model_data_1_sub) > 0:
        model_data_1_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_1_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_1_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    if len(model_data_2_sub) > 0:
        model_data_2_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_2_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_2_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    model_data_1_with_preds.append(model_data_1_sub)
    model_data_2_with_preds.append(model_data_2_sub)
    
model_data_1_with_preds = pd.concat(model_data_1_with_preds, axis=0)
model_data_2_with_preds = pd.concat(model_data_2_with_preds, axis=0)

  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)


In [34]:
model_data_1_with_preds['lm_odds_over'] = 1/model_data_1_with_preds['lm_preds']
model_data_1_with_preds['rf_odds_over'] = 1/model_data_1_with_preds['rf_preds']
model_data_1_with_preds['xgb_odds_over'] = 1/model_data_1_with_preds['xgb_preds']

In [35]:
model_data_2_with_preds['lm_odds_over'] = 1/model_data_2_with_preds['lm_preds']
model_data_2_with_preds['rf_odds_over'] = 1/model_data_2_with_preds['rf_preds']
model_data_2_with_preds['xgb_odds_over'] = 1/model_data_2_with_preds['xgb_preds']

In [36]:
model_data_1_with_preds['lm_odds_under'] = 1/(1-model_data_1_with_preds['lm_preds'])
model_data_1_with_preds['rf_odds_under'] = 1/(1-model_data_1_with_preds['rf_preds'])
model_data_1_with_preds['xgb_odds_under'] = 1/(1-model_data_1_with_preds['xgb_preds'])

In [37]:
model_data_2_with_preds['lm_odds_under'] = 1/(1-model_data_2_with_preds['lm_preds'])
model_data_2_with_preds['rf_odds_under'] = 1/(1-model_data_2_with_preds['rf_preds'])
model_data_2_with_preds['xgb_odds_under'] = 1/(1-model_data_2_with_preds['xgb_preds'])

In [38]:
model_data_3_with_preds = viable_matches_with_model_data3.copy()

model_data_4_with_preds = viable_matches_with_model_data4.copy()

In [39]:
model_data_1_with_preds.shape

(436, 377)

In [40]:
model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1).shape

(306, 377)

In [41]:
first_or_last = 'first'

if first_or_last == 'first':
    
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

else:
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

In [42]:
model_data_3_with_preds_unique.shape

(202, 518)

In [43]:
model_data_3_with_preds_unique.shape

(202, 518)

In [44]:
back_lay_max_pc = 0.1
odds_gap_min = 0

model_data_1_with_preds_unique['action'] = 'None'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_over_lay_1']/model_data_1_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_under_lay_1']/model_data_1_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [45]:
model_data_2_with_preds_unique['action'] = 'None'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_over_lay_1']/model_data_2_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_under_lay_1']/model_data_2_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [46]:
model_data_3_with_preds_unique['action'] = 'None'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_over_lay_1']/model_data_3_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_under_lay_1']/model_data_3_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [47]:
model_data_4_with_preds_unique['action'] = 'None'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_over_lay_1']/model_data_4_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_under_lay_1']/model_data_4_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [48]:
# need to add outcomes
model_data_1_with_preds_unique['outcome'] = None
model_data_2_with_preds_unique['outcome'] = None
model_data_3_with_preds_unique['outcome'] = None
model_data_4_with_preds_unique['outcome'] = None
for p in prediction_times:
    mask = model_data_1_with_preds_unique['next_prediction_time']==p
    model_data_1_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_1_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_2_with_preds_unique['next_prediction_time']==p
    model_data_2_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_2_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_3_with_preds_unique['next_prediction_time']==p
    model_data_3_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_3_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_4_with_preds_unique['next_prediction_time']==p
    model_data_4_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_4_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1


model_data_1_with_preds_unique['outcome'] = (model_data_1_with_preds_unique['total_goals_added_on_data'] > model_data_1_with_preds_unique['total_goals'])*1
model_data_2_with_preds_unique['outcome'] = (model_data_2_with_preds_unique['total_goals_added_on_data'] > model_data_2_with_preds_unique['total_goals'])*1
model_data_3_with_preds_unique['outcome'] = (model_data_3_with_preds_unique['total_goals_added_on_data'] > model_data_3_with_preds_unique['total_goals'])*1
model_data_4_with_preds_unique['outcome'] = (model_data_4_with_preds_unique['total_goals_added_on_data'] > model_data_4_with_preds_unique['total_goals'])*1

In [49]:
# sum(model_data_1_with_preds_unique['outcome']!=model_data_1_with_preds_unique['outcome_orig'])/len(model_data_1_with_preds_unique)
sum(model_data_2_with_preds_unique['outcome']!=model_data_2_with_preds_unique['outcome_orig'])/len(model_data_2_with_preds_unique)
# sum(model_data_3_with_preds_unique['outcome']!=model_data_3_with_preds_unique['outcome_orig'])/len(model_data_3_with_preds_unique)

0.013921113689095127

In [50]:
model_data_1_with_preds_unique['bet'] = 1
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_1_with_preds_unique['win'] = 0
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['outcome']==1), 'win'] = 1
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['outcome']==0), 'win'] = 1

In [51]:
model_data_1_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [52]:
model_data_2_with_preds_unique['bet'] = 1
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_2_with_preds_unique['win'] = 0
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['outcome']==1), 'win'] = 1
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['outcome']==0), 'win'] = 1

In [53]:
model_data_2_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [54]:
model_data_3_with_preds_unique['bet'] = 1
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_3_with_preds_unique['win'] = 0
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['outcome']==1), 'win'] = 1
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['outcome']==0), 'win'] = 1

In [55]:
model_data_3_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [56]:
model_data_4_with_preds_unique['bet'] = 1
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_4_with_preds_unique['win'] = 0
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['outcome']==1), 'win'] = 1
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['outcome']==0), 'win'] = 1

In [57]:
model_data_4_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[under_win_mask, 'actual_odds_under_back_3']

In [58]:
max_bet = 100
model_data_1_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_1_with_preds_unique['action']=='under'
model_data_1_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_1_with_preds_unique['action']=='over'
model_data_1_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_1_with_preds_unique['max_winnings'] = model_data_1_with_preds_unique['max_bet_amount']*model_data_1_with_preds_unique['winnings']

model_data_1_with_preds_unique['profit'] = model_data_1_with_preds_unique['winnings'] - model_data_1_with_preds_unique['bet']
model_data_1_with_preds_unique['max_profit'] = model_data_1_with_preds_unique['max_winnings'] - model_data_1_with_preds_unique['max_bet_amount']

In [59]:
model_data_2_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_2_with_preds_unique['action']=='under'
model_data_2_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_2_with_preds_unique['action']=='over'
model_data_2_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_2_with_preds_unique['max_winnings'] = model_data_2_with_preds_unique['max_bet_amount']*model_data_2_with_preds_unique['winnings']

model_data_2_with_preds_unique['profit'] = model_data_2_with_preds_unique['winnings'] - model_data_2_with_preds_unique['bet']
model_data_2_with_preds_unique['max_profit'] = model_data_2_with_preds_unique['max_winnings'] - model_data_2_with_preds_unique['max_bet_amount']

In [60]:
model_data_3_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_3_with_preds_unique['action']=='under'
model_data_3_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_3_with_preds_unique['action']=='over'
model_data_3_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_3_with_preds_unique['max_winnings'] = model_data_3_with_preds_unique['max_bet_amount']*model_data_3_with_preds_unique['winnings']

model_data_3_with_preds_unique['profit'] = model_data_3_with_preds_unique['winnings'] - model_data_3_with_preds_unique['bet']
model_data_3_with_preds_unique['max_profit'] = model_data_3_with_preds_unique['max_winnings'] - model_data_3_with_preds_unique['max_bet_amount']

In [61]:
model_data_4_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_4_with_preds_unique['action']=='under'
model_data_4_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds_unique['action']=='over'
model_data_4_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds_unique['max_winnings'] = model_data_4_with_preds_unique['max_bet_amount']*model_data_4_with_preds_unique['winnings']

model_data_4_with_preds_unique['profit'] = model_data_4_with_preds_unique['winnings'] - model_data_4_with_preds_unique['bet']
model_data_4_with_preds_unique['max_profit'] = model_data_4_with_preds_unique['max_winnings'] - model_data_4_with_preds_unique['max_bet_amount']

In [62]:
model_data_1_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,22,0,0,0.0,0.0,0.0,0.0,0.0
50,over,18,18,13,18.57,0.57,1663.13,1668.7075,5.5775
50,under,11,11,4,17.0,6.0,766.63,1472.92,706.29
60,,20,0,0,0.0,0.0,0.0,0.0,0.0
60,over,18,18,11,17.85,-0.15,1531.98,1656.4962,124.5162
60,under,13,13,4,13.6,0.6,1050.22,1038.835,-11.385
70,,15,0,0,0.0,0.0,0.0,0.0,0.0
70,over,14,14,8,17.44,3.44,1005.1,1186.0937,180.9937
70,under,22,22,10,23.86,1.86,1643.52,1795.9906,152.4706
75,,14,0,0,0.0,0.0,0.0,0.0,0.0


In [63]:
model_data_1_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,94,0,0,0.0,0.0,0.0,0.0,0.0
over,76,76,38,78.64,2.64,6192.9,6105.6774,-87.2226
under,136,136,80,164.83,28.83,11344.79,13677.2342,2332.4442


In [64]:
model_data_2_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,60,0,0,0.0,0.0,0.0,0.0,0.0
50,over,53,53,42,58.7,5.7,4199.27,4795.0203,595.7503
50,under,31,31,9,45.6,14.6,1746.38,2009.288,262.908
60,,58,0,0,0.0,0.0,0.0,0.0,0.0
60,over,47,47,35,55.23,8.23,3533.86,4324.8431,790.9831
60,under,39,39,16,55.94,16.94,2419.09,3396.7253,977.6353
70,,50,0,0,0.0,0.0,0.0,0.0,0.0
70,over,45,45,29,55.47,10.47,3154.42,3775.3085,620.8885
70,under,48,48,21,51.96,3.96,3113.07,3404.585,291.515
75,,48,0,0,0.0,0.0,0.0,0.0,0.0


In [65]:
model_data_2_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,328,0,0,0.0,0.0,0.0,0.0,0.0
over,250,250,149,288.12,38.12,16887.13,21032.5929,4145.4629
under,284,284,158,351.28,67.28,18759.17,23141.7965,4382.6265


In [66]:
model_data_3_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,11,0,0,0.0,0.0,0.0,0.0,0.0
50,over,15,15,8,11.82,-3.18,1279.49,998.1186,-281.3714
50,under,7,7,2,10.25,3.25,376.97,1025.0,648.03
60,,11,0,0,0.0,0.0,0.0,0.0,0.0
60,over,15,15,7,11.43,-3.57,1138.24,894.1435,-244.0965
60,under,8,8,4,12.66,4.66,565.55,892.388,326.838
70,,8,0,0,0.0,0.0,0.0,0.0,0.0
70,over,12,12,5,10.76,-1.24,664.24,432.4776,-231.7624
70,under,14,14,7,17.1,3.1,1253.78,1536.5304,282.7504
75,,17,0,0,0.0,0.0,0.0,0.0,0.0


In [67]:
model_data_3_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,71,0,0,0.0,0.0,0.0,0.0,0.0
over,67,67,29,59.99,-7.01,4762.58,4280.6399,-481.9401
under,64,64,41,89.56,25.56,5183.73,7575.3333,2391.6033


In [68]:
model_data_4_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,14,0,0,0.0,0.0,0.0,0.0,0.0
50,over,15,15,11,14.97,-0.03,1290.76,1345.9276,55.1676
50,under,10,10,3,12.85,2.85,665.16,798.81,133.65
60,,9,0,0,0.0,0.0,0.0,0.0,0.0
60,over,13,13,9,16.31,3.31,1077.79,1487.3182,409.5282
60,under,13,13,8,27.17,14.17,891.33,2119.1528,1227.8228
70,,12,0,0,0.0,0.0,0.0,0.0,0.0
70,over,11,11,4,8.26,-2.74,1012.06,826.0,-186.06
70,under,14,14,10,25.67,11.67,1066.12,1928.2188,862.0988
75,,10,0,0,0.0,0.0,0.0,0.0,0.0


In [69]:
model_data_4_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,64,0,0,0.0,0.0,0.0,0.0,0.0
over,61,61,30,56.73,-4.27,5122.51,4851.4144,-271.0956
under,102,102,70,153.34,51.34,7660.37,11843.7962,4183.4262


In [93]:
from_date = '2023-01-14 17:00'
model_data_4_with_preds_unique[model_data_4_with_preds_unique['match_date']>=from_date].groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,12,0,0,0.0,0.0,0.0,0.0,0.0
50,over,9,9,6,7.97,-1.03,690.76,645.9276,-44.8324
50,under,6,6,2,8.15,2.15,370.8,328.81,-41.99
60,,7,0,0,0.0,0.0,0.0,0.0,0.0
60,over,8,8,4,7.9,-0.1,631.77,737.5444,105.7744
60,under,9,9,6,21.05,12.05,693.28,1670.244,976.964
70,,7,0,0,0.0,0.0,0.0,0.0,0.0
70,over,6,6,2,3.86,-2.14,550.1,386.0,-164.1
70,under,11,11,8,20.37,9.37,895.21,1740.3836,845.1736
75,,8,0,0,0.0,0.0,0.0,0.0,0.0


In [94]:
model_data_4_with_preds_unique[model_data_4_with_preds_unique['match_date']>=from_date].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,49,0,0,0.0,0.0,0.0,0.0,0.0
over,37,37,15,28.16,-8.84,2907.01,2222.7566,-684.2534
under,69,69,46,104.79,35.79,5160.33,7902.0125,2741.6825


In [95]:
min_date = '2023-01-11' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 on
max_date = '2023-01-31'

mask_1 = model_data_1_with_preds_unique['match_date'].between(min_date, max_date)
mask_2 = model_data_2_with_preds_unique['match_date'].between(min_date, max_date)
mask_3 = model_data_3_with_preds_unique['match_date'].between(min_date, max_date)
mask_4 = model_data_4_with_preds_unique['match_date'].between(min_date, max_date)

bets = sum(model_data_1_with_preds_unique.loc[mask_1, 'bet']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'bet']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'bet']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'bet'])
wins = sum(model_data_1_with_preds_unique.loc[mask_1, 'win']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'win']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'win']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'win'])
winnings = sum(model_data_1_with_preds_unique.loc[mask_1, 'winnings']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'winnings']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'winnings']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'winnings'])

In [96]:
print(f'Total bets {bets}, total wins {wins}, total winnings {round(winnings,2)}, profit {round(100*(winnings-bets)/bets - 1,2)}%')

Total bets 214, total wins 130, total winnings 274.86, profit 27.44%


In [97]:
expected_wins = int(wins*bets/winnings)
expected_wins

101

In [98]:
expected_p = expected_wins/bets
expected_p

0.4719626168224299

In [99]:
actual_p = wins/bets
actual_p

0.6074766355140186

In [77]:
import scipy.stats as sps
sps.binom.cdf(wins, bets, expected_p)

0.9999737422561104

#### Compare comps

In [78]:
model_data_1_with_preds_unique[model_data_1_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Spanish Copa Del Rey,49,49,29,56.78,7.78,4205.06,4656.9977,451.9377
Italian Serie A,47,47,25,42.44,-4.56,4060.62,3825.9611,-234.6589
English Premier League,33,33,20,43.15,10.15,2968.38,4019.2474,1050.8674
Greek Super League,33,33,19,43.44,10.44,2580.01,3560.942,980.932
Turkish Super Lig,26,26,16,41.37,15.37,2217.52,2865.6238,648.1038
Welsh Premier League,9,9,6,11.76,2.76,442.2,646.487,204.287
French Ligue 1,8,8,3,4.53,-3.47,572.96,207.6526,-365.3074
Scottish Premiership,4,4,0,0.0,-4.0,358.24,0.0,-358.24
Portuguese Primeira Liga,2,2,0,0.0,-2.0,117.86,0.0,-117.86
English National North,1,1,0,0.0,-1.0,14.84,0.0,-14.84


In [79]:
model_data_2_with_preds_unique[model_data_2_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Coupe de France,106,106,71,150.34,44.34,7086.16,9780.1195,2693.9595
English FA Cup,78,78,49,94.12,16.12,6008.08,7150.4629,1142.3829
Spanish La Liga,45,45,30,66.93,21.93,3696.42,5258.8858,1562.4658
Portuguese Primeira Liga,41,41,32,81.76,40.76,2882.89,5645.1826,2762.2926
English National North,36,36,12,20.94,-15.06,1090.25,852.7255,-237.5245
Italian Serie A,33,33,8,15.93,-17.07,2686.05,1352.7328,-1333.3172
Australian A-League Men,25,25,17,36.3,11.3,1784.5,2551.5262,767.0262
Greek Super League,21,21,12,25.6,4.6,1480.62,2001.3753,520.7553
Scottish Premiership,20,20,13,28.26,8.26,1344.07,2052.0129,707.9429
Sky Bet League Two,19,19,12,24.86,5.86,1106.72,1757.8587,651.1387


In [80]:
model_data_3_with_preds_unique[model_data_3_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
French Ligue 1,39,39,24,46.99,7.99,3214.69,3705.9487,491.2587
Scottish Challenge Cup,11,11,5,10.63,-0.37,883.7,960.504,76.804
English National League,10,10,5,11.3,1.3,558.77,738.3128,179.5428
Italian Serie A,10,10,1,2.48,-7.52,939.62,248.0,-691.62
English Football League Trophy,8,8,2,2.89,-5.11,434.56,62.5909,-371.9691
Turkish Super Lig,7,7,3,5.26,-1.74,600.64,398.1384,-202.5016
English League Cup,6,6,5,16.4,10.4,600.0,1640.0,1040.0
English National North,6,6,5,10.53,4.53,370.99,679.3908,308.4008
Spanish La Liga,6,6,6,14.07,8.07,434.99,1107.3274,672.3374
Welsh Premier League,6,6,1,2.72,-3.28,286.74,272.0,-14.74


In [81]:
model_data_4_with_preds_unique[model_data_4_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Australian A-League Men,21,21,17,36.64,15.64,1232.66,2108.2717,875.6117
Portuguese Primeira Liga,19,19,9,21.1,2.1,1416.1,1557.9383,141.8383
Turkish Super Lig,16,16,8,14.2,-1.8,1368.57,1413.918,45.348
English Premier League,15,15,13,23.46,8.46,1251.91,2100.4296,848.5196
Spanish La Liga,15,15,7,11.11,-3.89,1137.83,733.2198,-404.6102
French Ligue 1,13,13,10,24.39,11.39,1224.33,2376.28,1151.95
Dutch Eredivisie,10,10,4,8.84,-1.16,635.35,758.5824,123.2324
Belgian Jupiler Pro League,10,10,5,8.81,-1.19,804.74,689.0668,-115.6732
Italian Serie A,10,10,5,8.67,-1.33,1000.0,867.0,-133.0
Coppa Italia,9,9,6,15.23,6.23,748.31,1357.88,609.57


In [82]:
break

SyntaxError: 'break' outside loop (<ipython-input-82-6aaf1f276005>, line 4)

# Data issues to look into
* Duplicate matches - THIS IS BECAUSE IT RERUNS FOR ANOTHER MATCH WITHIN 1 MINUTE AND THE SAME MATCH IS STILL WITHIN THE ALLOWABLE TIME RANGE
* ODDS CHANGES A LOT IN ONE MINUTE LATE ON! - Seems like this is ok based on testing, can also focus on earlier bets if a worry
* SL and BF string matching issues
* Delay (and think about how this will affect things in practice
* Goals scored differences between data at odds time and the retrospective data collection
* Oddly large looking odds

#### Delay times

In [None]:
model_data_2_with_preds_unique['delay_time'].describe()

#### Weirdly large looking odds

In [None]:
min_date = '2023-01-11' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 on
max_date = '2023-01-12'
prediction_time = 80
over_under = 'over'

mask = (
    model_data_3_with_preds_unique['match_date'].between(min_date, max_date) &
    (model_data_3_with_preds_unique['next_prediction_time']==prediction_time) &
    (model_data_3_with_preds_unique['action']==over_under)
)

model_data_3_with_preds_unique.loc[
    mask,
    ['match_date', 'team_a_name', 'team_b_name', 'betfair_name', 'team_a_score', 'team_b_score', 'runner_name_over',
     f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',
     'action', 'bet', 'win', 'winnings', 'max_bet_amount', 'max_winnings', 'profit','max_profit']]

#### Check test vs actual results

In [83]:
model_data_4_with_preds['action'] = 'None'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['actual_odds_over_lay_1']/model_data_4_with_preds['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['actual_odds_under_lay_1']/model_data_4_with_preds['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [84]:
# need to add outcomes
model_data_4_with_preds['outcome'] = None
for p in prediction_times:
    mask = model_data_4_with_preds['next_prediction_time']==p
    model_data_4_with_preds.loc[mask, 'outcome_orig'] = (model_data_4_with_preds.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1

model_data_4_with_preds['outcome'] = (model_data_4_with_preds['total_goals_added_on_data'] > model_data_4_with_preds['total_goals'])*1

In [85]:
model_data_4_with_preds['bet'] = 1
model_data_4_with_preds.loc[model_data_4_with_preds['action'] == 'None', 'bet'] = 0
model_data_4_with_preds['win'] = 0
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['outcome']==1), 'win'] = 1
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['outcome']==0), 'win'] = 1

In [86]:
model_data_4_with_preds['winnings'] = 0

over_win_mask = (model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[over_win_mask, 'winnings'] = model_data_4_with_preds.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[under_win_mask, 'winnings'] = model_data_4_with_preds.loc[under_win_mask, 'actual_odds_under_back_3']

In [87]:
model_data_4_with_preds['max_bet_amount'] = 0

under_mask = model_data_4_with_preds['action']=='under'
model_data_4_with_preds.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds['action']=='over'
model_data_4_with_preds.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds['max_winnings'] = model_data_4_with_preds['max_bet_amount']*model_data_4_with_preds['winnings']

model_data_4_with_preds['profit'] = model_data_4_with_preds['winnings'] - model_data_4_with_preds['bet']
model_data_4_with_preds['max_profit'] = model_data_4_with_preds['max_winnings'] - model_data_4_with_preds['max_bet_amount']

In [111]:
from_date = '2023-01-14 17:00'
model_data_4_with_preds[model_data_4_with_preds['datetime_utc']>=from_date].groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,13,0,0,0.0,0.0,0.0,0.0,0.0
50,over,7,7,4,5.4,-1.6,574.41,502.5456,-71.8644
50,under,7,7,3,12.25,5.25,486.04,702.279,216.239
60,,10,0,0,0.0,0.0,0.0,0.0,0.0
60,over,11,11,4,6.33,-4.67,931.77,580.5444,-351.2256
60,under,10,10,6,21.15,11.15,754.5,1751.892,997.392
70,,7,0,0,0.0,0.0,0.0,0.0,0.0
70,over,8,8,2,3.86,-4.14,750.1,386.0,-364.1
70,under,9,9,7,17.95,8.95,810.41,1583.5676,773.1576
75,,7,0,0,0.0,0.0,0.0,0.0,0.0


In [112]:
model_data_4_with_preds[model_data_4_with_preds['datetime_utc']>=from_date].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,53,0,0,0.0,0.0,0.0,0.0,0.0
over,44,44,13,24.02,-19.98,3611.34,1922.3746,-1688.9654
under,66,66,46,106.38,40.38,4905.2,8042.6615,3137.4615


* 85 in betfair settled (since 2023-01-14 17:00) vs 110 bets above
* 18 in betfair cancelled
* 0 lapsed
* 7 order errors recorded in testing_live_order_results in db

In [None]:
market_id in ['1.208431813', '1.208431854', '1.208431858']

In [113]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 150
model_data_4_with_preds.loc[
    (model_data_4_with_preds['datetime_utc']>=from_date) & (model_data_4_with_preds['action']!='None')# & (model_data_4_with_preds['market_id'].isin(['1.208431813', '1.208431854', '1.208431858']))
    ,
    ['match_date', 'datetime_utc', 'betfair_id', 'team_a_name', 'team_b_name', 'betfair_name',
     'action', 'bet', 'win', 'winnings', 'profit', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'max_bet_amount', 'max_winnings','max_profit',
     'team_a_score', 'team_b_score', 'runner_name_over',
     #f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',  'datetime_utc'
     ]].sort_values(['team_a_name', 'action', 'actual_odds_over_back_1'])

Unnamed: 0,match_date,datetime_utc,betfair_id,team_a_name,team_b_name,betfair_name,action,bet,win,winnings,profit,actual_odds_over_back_1,actual_odds_under_back_1,max_bet_amount,max_winnings,max_profit,team_a_score,team_b_score,runner_name_over,clock,lm_odds_over,rf_odds_over,lm_odds_under,rf_odds_under,delay_time,team_a_score_added_on_data,team_b_score_added_on_data,datetime_utc.1
226,2023-01-14T20:00:00.000Z,2023-01-14 21:25:28,32011398,Ajax,FC Twente,Ajax v FC Twente,under,1,1,3.2,2.2,1.42,3.3,100.0,320.0,220.0,0,0,Over 0.5 Goals,60',1.454283,1.539664,3.201272,2.853006,5,0,0,2023-01-14 21:25:28
238,2023-01-14T20:00:00.000Z,2023-01-14 21:35:20,32011398,Ajax,FC Twente,Ajax v FC Twente,under,1,1,2.4,1.4,1.68,2.44,100.0,240.0,140.0,0,0,Over 0.5 Goals,70',1.716358,1.850437,2.39595,2.175866,5,0,0,2023-01-14 21:35:20
251,2023-01-14T20:00:00.000Z,2023-01-14 21:45:14,32011398,Ajax,FC Twente,Ajax v FC Twente,under,1,1,1.7,0.7,2.36,1.72,100.0,170.0,70.0,0,0,Over 0.5 Goals,80',2.438426,2.622446,1.695204,1.616353,5,0,0,2023-01-14 21:45:14
143,2023-01-14T17:30:00.000Z,2023-01-14 18:39:27,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,1,1.36,0.36,1.38,3.6,72.46,98.5456,26.0856,1,0,Over 1.5 Goals,50',1.2836,1.303543,4.526088,4.294431,5,2,0,2023-01-14 18:39:27
145,2023-01-14T17:30:00.000Z,2023-01-14 18:40:36,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,1,1.37,0.37,1.39,3.5,100.0,137.0,37.0,1,0,Over 1.5 Goals,51',1.2836,1.303543,4.526088,4.294431,5,2,0,2023-01-14 18:40:36
154,2023-01-14T17:30:00.000Z,2023-01-14 18:49:19,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,1,1.5,0.5,1.52,2.88,100.0,150.0,50.0,1,0,Over 1.5 Goals,60',1.447056,1.494745,3.236858,3.021243,5,2,0,2023-01-14 18:49:19
157,2023-01-14T17:30:00.000Z,2023-01-14 18:50:28,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,1,1.58,0.58,1.6,2.6,100.0,158.0,58.0,1,0,Over 1.5 Goals,61',1.447056,1.494745,3.236858,3.021243,5,2,0,2023-01-14 18:50:28
165,2023-01-14T17:30:00.000Z,2023-01-14 18:58:58,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,1,1.88,0.88,1.9,2.06,100.0,188.0,88.0,1,0,Over 1.5 Goals,70',1.776154,1.805291,2.288404,2.241787,5,2,0,2023-01-14 18:58:58
177,2023-01-14T17:30:00.000Z,2023-01-14 19:08:59,31997428,Brentford,Bournemouth,Brentford v Bournemouth,over,1,0,0.0,-1.0,2.58,1.61,67.33,0.0,-67.33,2,0,Over 2.5 Goals,80',2.430508,2.536795,1.699053,1.650705,5,2,0,2023-01-14 19:08:59
170,2023-01-14T17:30:00.000Z,2023-01-14 19:04:33,31997428,Brentford,Bournemouth,Brentford v Bournemouth,under,1,0,0.0,-1.0,2.1,1.9,20.22,0.0,-20.22,1,0,Over 1.5 Goals,75',2.11162,2.151114,1.899588,1.868724,5,2,0,2023-01-14 19:04:33


In [None]:
market_datetime_counts = model_data_4_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})

In [None]:
market_datetime_counts.groupby('count').size()

In [None]:
market_datetime_counts3 = model_data_3_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})

In [None]:
market_datetime_counts3.groupby('count').size()

In [None]:
model_data_4_with_preds.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_4_with_preds[
    model_data_4_with_preds['market_id'].isin(market_datetime_counts.loc[market_datetime_counts['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_4_with_preds[
    ~model_data_4_with_preds['market_id'].isin(market_datetime_counts.loc[market_datetime_counts['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds['action'] = 'None'

model_data_3_with_preds.loc[
    (model_data_3_with_preds['lm_odds_over']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_over_back_1']) &
    (model_data_3_with_preds['rf_odds_over']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_over_back_1']) &
    (model_data_3_with_preds['actual_odds_over_lay_1']/model_data_3_with_preds['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_3_with_preds.loc[
    (model_data_3_with_preds['lm_odds_under']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_under_back_1']) &
    (model_data_3_with_preds['rf_odds_under']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_under_back_1']) &
    (model_data_3_with_preds['actual_odds_under_lay_1']/model_data_3_with_preds['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [None]:
# need to add outcomes
model_data_3_with_preds['outcome'] = None
for p in prediction_times:
    mask = model_data_3_with_preds['next_prediction_time']==p
    model_data_3_with_preds.loc[mask, 'outcome_orig'] = (model_data_3_with_preds.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1

model_data_3_with_preds['outcome'] = (model_data_3_with_preds['total_goals_added_on_data'] > model_data_3_with_preds['total_goals'])*1

In [None]:
model_data_3_with_preds['bet'] = 1
model_data_3_with_preds.loc[model_data_3_with_preds['action'] == 'None', 'bet'] = 0
model_data_3_with_preds['win'] = 0
model_data_3_with_preds.loc[(model_data_3_with_preds['action']=='over') & (model_data_3_with_preds['outcome']==1), 'win'] = 1
model_data_3_with_preds.loc[(model_data_3_with_preds['action']=='under') & (model_data_3_with_preds['outcome']==0), 'win'] = 1

In [None]:
model_data_3_with_preds['winnings'] = 0

over_win_mask = (model_data_3_with_preds['action']=='over') & (model_data_3_with_preds['win']==1)
model_data_3_with_preds.loc[over_win_mask, 'winnings'] = model_data_3_with_preds.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_3_with_preds['action']=='under') & (model_data_3_with_preds['win']==1)
model_data_3_with_preds.loc[under_win_mask, 'winnings'] = model_data_3_with_preds.loc[under_win_mask, 'actual_odds_under_back_3']

In [None]:
model_data_3_with_preds['max_bet_amount'] = 0

under_mask = model_data_3_with_preds['action']=='under'
model_data_3_with_preds.loc[under_mask, 'max_bet_amount'] = model_data_3_with_preds.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_3_with_preds['action']=='over'
model_data_3_with_preds.loc[over_mask, 'max_bet_amount'] = model_data_3_with_preds.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_3_with_preds['max_winnings'] = model_data_3_with_preds['max_bet_amount']*model_data_3_with_preds['winnings']

model_data_3_with_preds['profit'] = model_data_3_with_preds['winnings'] - model_data_3_with_preds['bet']
model_data_3_with_preds['max_profit'] = model_data_3_with_preds['max_winnings'] - model_data_3_with_preds['max_bet_amount']

In [None]:
model_data_3_with_preds.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds[
    model_data_3_with_preds['market_id'].isin(market_datetime_counts3.loc[market_datetime_counts3['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds[
    ~model_data_3_with_preds['market_id'].isin(market_datetime_counts3.loc[market_datetime_counts3['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})