# Get Previous Outcomes From Sporting Life

In [1]:
import requests
import urllib
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook, tqdm
import datetime
import time
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw
import logging

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb
import statsmodels.api as sm

from Levenshtein import distance as levenshtein_distance

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None

# Get Data For Dates

In [4]:
past_dates = pd.date_range(start='2023-01-02', end='2023-02-04')

In [5]:
matches_data = []
goals_data = []
failed_dates = []

In [6]:
for d in tqdm(past_dates):
    try:
        yyyymmdd = d.strftime('%Y')+'-'+d.strftime('%m')+'-'+d.strftime('%d')
        dateurl = 'https://www.sportinglife.com/football/fixtures-results/'+yyyymmdd
        datejson = urllib.request.urlopen(dateurl).read()
        soup = BeautifulSoup(datejson)
        soup_find = soup.body.find(attrs={"type": "application/json"})
        soup_json = json.loads(soup_find.text)
        
        matches = soup_json.get('props', {}).get('pageProps', {}).get('matches', [])
        for m in matches:
            match_ref = m.get('match_reference', {}).get('id')
            match_date = m.get('match_date')
            match_time = m.get('match_time')

            matches_data.append(
                [
                    match_ref, match_date, match_time,
                    m.get('state'),
                    m.get('match_type'), 
                    m.get('competition', {}).get('competition_reference', {}).get('id'),
                    m.get('competition', {}).get('name'),
                    str(m.get('round')),
                    m.get('legs'),
                    m.get('leg'),
                    m.get('team_score_a', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_a', {}).get('team', {}).get('name'),
                    m.get('team_score_a', {}).get('team', {}).get('short_name'),
                    m.get('team_score_a', {}).get('score', [])[0].get('score'),
                    m.get('team_score_b', {}).get('team', {}).get('team_reference', {}).get('id'),
                    m.get('team_score_b', {}).get('team', {}).get('name'),
                    m.get('team_score_b', {}).get('team', {}).get('short_name'),
                    m.get('team_score_b', {}).get('score', [])[0].get('score'),
                    m.get('match_outcome', {}).get('outcome'),
                    m.get('match_outcome', {}).get('result_type'),
                    m.get('match_outcome', {}).get('winner', {}).get('team_reference', {}).get('id'),
                    m.get('match_outcome', {}).get('winner', {}).get('name'),
                    m.get('match_outcome', {}).get('winner', {}).get('short_name'),
                    m.get('half_time_score', {}).get('home'),
                    m.get('half_time_score', {}).get('away'),
                    m.get('full_time_score', {}).get('home'),
                    m.get('full_time_score', {}).get('away'),
                    m.get('clock')
                ]
            )

            homegoals = m.get('homeGoals', [])
            for g in homegoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'home'
                        ]
                    )

            awaygoals = m.get('awayGoals', [])
            for g in awaygoals:
                player = g.get('team_player')
                goal_id = g.get('id')
                for goal in g.get('goal', []):
                    goals_data.append(
                        [
                            match_ref, match_date, match_time,
                            player, goal_id,
                            goal.get('type'),
                            goal.get('time'),
                            goal.get('event_id'),
                            goal.get('event_time'),
                            'away'
                        ]
                    )
        
    except:
        failed_dates.append(d)

100%|██████████| 34/34 [00:44<00:00,  1.31s/it]


In [7]:
matches_cols = [
    'match_ref', 'match_date', 'match_time', 'state', 'match_type', 'competition_id', 'competition_name', 
    'round', 'legs', 'leg', 'team_a_id', 'team_a_name', 'team_a_short_name', 'team_a_score',
    'team_b_id', 'team_b_name', 'team_b_short_name', 'team_b_score',
    'outcome', 'result_type', 'winner_id', 'winner_name', 'winner_short_name',
    'half_time_score_home', 'half_time_score_away',
    'full_time_score_home', 'full_time_score_away', 'clock'
]
matches_df = pd.DataFrame(matches_data, columns=matches_cols)

In [8]:
goals_cols = [
    'match_ref', 'match_date', 'match_time', 'player', 'goal_id', 'type', 'time', 'event_id', 'event_time', 'side'
]
goals_df = pd.DataFrame(goals_data, columns=goals_cols)

In [9]:
def get_extra_time(t):
    time_split = t.replace("'", "").split("+")
    if len(time_split)>1:
        return int(time_split[1])
    else:
        return 0

goals_df['time_regular'] = goals_df['time'].apply(lambda x: int(x.replace("'", "").split("+")[0]))
goals_df['time_extra'] = goals_df['time'].apply(get_extra_time)

In [10]:
matches_df['year'] = matches_df['match_date'].apply(lambda x: x[:4])

In [11]:
time_cutoffs = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
cap_at = 90
time_features = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_pre_{t}m']
    if t > min(time_cutoffs):
        goals_df[f'goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'goals_pre_{t}m'] - goals_df[f'goals_pre_{time_cutoffs[i-1]}m'])
        time_features += [f'goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at))*1
    time_features += [f'goals_post_{t}m']

# same for home and away goals
time_features_home_away = []
for i, t in enumerate(time_cutoffs):
    goals_df[f'home_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_pre_{t}m']
    
    goals_df[f'away_goals_pre_{t}m'] = ((goals_df[f'time_regular']<t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_pre_{t}m']
    
    if t > min(time_cutoffs):
        goals_df[f'home_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'home_goals_pre_{t}m'] - goals_df[f'home_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'home_goals_{time_cutoffs[i-1]}m_to_{t}m']
        
        goals_df[f'away_goals_{time_cutoffs[i-1]}m_to_{t}m'] = (
            goals_df[f'away_goals_pre_{t}m'] - goals_df[f'away_goals_pre_{time_cutoffs[i-1]}m'])
        time_features_home_away += [f'away_goals_{time_cutoffs[i-1]}m_to_{t}m']
    
    goals_df[f'home_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='home'))*1
    time_features_home_away += [f'home_goals_post_{t}m']
    
    goals_df[f'away_goals_post_{t}m'] = ((goals_df[f'time_regular']>=t) & (goals_df[f'time_regular']<=cap_at) & (goals_df[f'side']=='away'))*1
    time_features_home_away += [f'away_goals_post_{t}m']

In [12]:
goals_features = goals_df.groupby(['match_ref', 'match_date'])[time_features+time_features_home_away].sum().reset_index()

In [13]:
model_data = matches_df.merge(goals_features, how='left', on=['match_ref', 'match_date'])

for f in time_features + time_features_home_away:
    model_data[f] = model_data[f].fillna(0)
    
for t in time_cutoffs:
    model_data[f'goal_diff_at_{t}'] = model_data[f'home_goals_pre_{t}m'] - model_data[f'away_goals_pre_{t}m']
    model_data[f'abs_goal_diff_at_{t}'] = abs(model_data[f'goal_diff_at_{t}'])

In [14]:
# check only include games with no goals data if score is 0 - 0
model_data['goals_data_num_goals'] = model_data['goals_pre_90m'] + model_data['goals_post_90m']
model_data['matches_data_num_goals'] = model_data['team_a_score'] + model_data['team_b_score']
et_mask = model_data['result_type']=='AFTEREXTRATIME'
model_data.loc[et_mask, 'matches_data_num_goals'] = model_data.loc[et_mask, 'full_time_score_home'] + model_data.loc[et_mask, 'full_time_score_away']

In [15]:
model_data_no_goals_data = model_data[model_data['goals_data_num_goals'] != model_data['matches_data_num_goals']]
model_data = model_data[model_data['goals_data_num_goals'] == model_data['matches_data_num_goals']]

In [16]:
model_data = model_data[model_data['half_time_score_home'].notnull()]

In [17]:
model_data['any_goal_post_50'] = (model_data['goals_post_50m']>0)*1
model_data['any_goal_post_60'] = (model_data['goals_post_60m']>0)*1
model_data['any_goal_post_70'] = (model_data['goals_post_70m']>0)*1
model_data['any_goal_post_75'] = (model_data['goals_post_75m']>0)*1
model_data['any_goal_post_80'] = (model_data['goals_post_80m']>0)*1
model_data['any_goal_post_85'] = (model_data['goals_post_85m']>0)*1

In [18]:
model_data['total_goals'] = model_data['goals_data_num_goals']
model_data['score_diff'] = model_data['team_a_score'] - model_data['team_b_score']
model_data['score_diff_abs'] = abs(model_data['score_diff'])

In [19]:
model_data['year'] = model_data['match_date'].apply(lambda x: x[:4]).astype(int)
model_data['month'] = model_data['match_date'].apply(lambda x: x[5:7]).astype(int)

In [20]:
model_data.shape

(1197, 236)

# Get Data from DB

In [21]:
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/sl_bf_late_goals'
sql_engine = sqlalchemy.create_engine(connect_string)
testing_viable_matches = pd.read_sql('''SELECT * FROM testing_viable_matches''', con=sql_engine)
testing2_viable_matches = pd.read_sql('''SELECT * FROM testing2_viable_matches''', con=sql_engine)
testing_model_data = pd.read_sql('''SELECT * FROM testing_model_data''', con=sql_engine)
testing2_model_data = pd.read_sql('''SELECT * FROM testing2_model_data''', con=sql_engine)

In [22]:
testing_viable_matches.shape

(461, 78)

In [23]:
testing2_viable_matches.shape

(1299, 78)

In [24]:
select_cols1 = f'select v.{testing_viable_matches.columns[0]}'
for c in testing_viable_matches.columns[1:]:
    select_cols1 += f', v.{c}'
for c in testing_model_data.columns:
    if c not in testing_viable_matches.columns:
        select_cols1 += f', m.{c}'
        
select_cols2 = f'select v.{testing2_viable_matches.columns[0]}'
for c in testing2_viable_matches.columns[1:]:
    select_cols2 += f', v.{c}'
for c in testing2_model_data.columns:
    if c not in testing2_viable_matches.columns:
        select_cols2 += f', m.{c}'

In [25]:
viable_matches_with_model_data = pd.read_sql(f'''{select_cols1} FROM testing_viable_matches v LEFT JOIN testing_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data2 = pd.read_sql(f'''{select_cols2} FROM testing2_viable_matches v LEFT JOIN testing2_model_data m ON v.match_ref = m.match_ref and v.datetime_utc = m.datetime_utc''', con=sql_engine)
viable_matches_with_model_data3 = pd.read_sql(f'''SELECT * FROM testing3_model_data_with_preds''', con=sql_engine)
viable_matches_with_model_data4 = pd.read_sql(f'''SELECT * FROM testing_live_model_data_with_preds''', con=sql_engine)

In [26]:
viable_matches_with_model_data.shape

(461, 131)

In [27]:
viable_matches_with_model_data2.shape

(1299, 277)

In [28]:
viable_matches_with_model_data3.shape

(360, 283)

In [29]:
viable_matches_with_model_data3.shape

(360, 283)

In [30]:
# add on the extra features needed for the original v1 data collected
viable_matches_with_model_data1 = viable_matches_with_model_data.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data2 = viable_matches_with_model_data2.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data3 = viable_matches_with_model_data3.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))
viable_matches_with_model_data4 = viable_matches_with_model_data4.merge(model_data, how='left', on='match_ref', suffixes=('', '_added_on_data'))

In [31]:
viable_matches_with_model_data4[viable_matches_with_model_data4['team_a_name']=='Rangers']

Unnamed: 0,match_ref,match_date,match_time,state,match_type,competition_id,competition_name,round,legs,leg,...,any_goal_post_50,any_goal_post_60,any_goal_post_70,any_goal_post_75,any_goal_post_80,any_goal_post_85,total_goals_added_on_data,score_diff,score_diff_abs,month_added_on_data
382,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
385,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
389,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
393,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
399,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
402,157364,2023-01-15T15:00:00.000Z,15:00,SECONDHALF,Cup,42,Scottish League Cup,"{'round_number': 4, 'round_type': 'Semi-Finals'}",1,1,...,,,,,,,,,,
1808,145241,2023-01-28T15:00:00.000Z,15:00,SECONDHALF,Regular,62,Scottish Premiership,{},1,1,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0
1865,145241,2023-01-28T15:00:00.000Z,15:00,SECONDHALF,Regular,62,Scottish Premiership,{},1,1,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0
2039,145241,2023-01-28T15:00:00.000Z,15:00,SECONDHALF,Regular,62,Scottish Premiership,{},1,1,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0
2081,145241,2023-01-28T15:00:00.000Z,15:00,SECONDHALF,Regular,62,Scottish Premiership,{},1,1,...,1.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0


In [32]:
goals_df[goals_df['match_ref']==157364]

Unnamed: 0,match_ref,match_date,match_time,player,goal_id,type,time,event_id,event_time,side,...,home_goals_80m_to_85m,away_goals_80m_to_85m,home_goals_post_85m,away_goals_post_85m,home_goals_pre_90m,away_goals_pre_90m,home_goals_85m_to_90m,away_goals_85m_to_90m,home_goals_post_90m,away_goals_post_90m


In [33]:
model_data[model_data['match_ref']==157364]

Unnamed: 0,match_ref,match_date,match_time,state,match_type,competition_id,competition_name,round,legs,leg,...,any_goal_post_50,any_goal_post_60,any_goal_post_70,any_goal_post_75,any_goal_post_80,any_goal_post_85,total_goals,score_diff,score_diff_abs,month


# Get models and do predictions

In [34]:
# # football models
# with open('/home/angus/projects/betting/football/models/late_goals_test_model_1.pickle', 'rb') as f:
#     test_model_1_80m = pickle.load(f)
with open('/home/angus/projects/betting/football/models/late_goals_test_models_2.pickle', 'rb') as f:
    models_dicts = pickle.load(f)

In [35]:
models_dicts

{'any_goal_post_50': {'lin_mod': <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fab449bc748>,
  'rf_mod': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=6, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=2, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=16,
                         n_jobs=None, oob_score=False, random_state=None,
                         verbose=0, warm_start=False),
  'xgb_mod': XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                colsample_bynode=1, colsample_bytree=1, gamma=0,
                learning_rate=0.1, max_delta_step=0, max_depth=2,
                min_child_weight=1, missing=nan, n_estimators=4, n_jobs=1,
                nthread=None, objective='binary:logistic', random_state=0,
          

In [36]:
prediction_times = [50, 60, 70, 75, 80, 85]
model_data_1_with_preds = []
model_data_2_with_preds = []
for p in prediction_times:
    model_data_1_sub = viable_matches_with_model_data1[viable_matches_with_model_data1['next_prediction_time']==p]
    model_data_2_sub = viable_matches_with_model_data2[viable_matches_with_model_data2['next_prediction_time']==p]

    train_rc_comp = models_dicts[f'any_goal_post_{p}']['train_rc_comp']

    model_data_1_sub['competition_name_rc'] = model_data_1_sub['competition_name']
    model_data_1_sub.loc[~model_data_1_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_1_sub = model_data_1_sub.merge(train_rc_comp, how='left', on='competition_name_rc')

    model_data_2_sub['competition_name_rc'] = model_data_2_sub['competition_name']
    model_data_2_sub.loc[~model_data_2_sub['competition_name'].isin(train_rc_comp['competition_name_rc']), 'competition_name_rc'] = 'Other'
    model_data_2_sub = model_data_2_sub.merge(train_rc_comp, how='left', on='competition_name_rc')
    
    model_data_1_sub = model_data_1_sub[model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    model_data_2_sub = model_data_2_sub[model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']].isnull().sum(axis=1)==0]
    
    if len(model_data_1_sub) > 0:
        model_data_1_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_1_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_1_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_1_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    if len(model_data_2_sub) > 0:
        model_data_2_sub['lm_preds'] = models_dicts[f'any_goal_post_{p}']['lin_mod'].predict(sm.add_constant(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']], has_constant='add'))
        model_data_2_sub['rf_preds'] = models_dicts[f'any_goal_post_{p}']['rf_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
        model_data_2_sub['xgb_preds'] = models_dicts[f'any_goal_post_{p}']['xgb_mod'].predict_proba(model_data_2_sub[models_dicts[f'any_goal_post_{p}']['features']])[:, 1]
    
    model_data_1_with_preds.append(model_data_1_sub)
    model_data_2_with_preds.append(model_data_2_sub)
    
model_data_1_with_preds = pd.concat(model_data_1_with_preds, axis=0)
model_data_2_with_preds = pd.concat(model_data_2_with_preds, axis=0)

  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)
  return ptp(axis=axis, out=out, **kwargs)


In [37]:
model_data_1_with_preds['lm_odds_over'] = 1/model_data_1_with_preds['lm_preds']
model_data_1_with_preds['rf_odds_over'] = 1/model_data_1_with_preds['rf_preds']
model_data_1_with_preds['xgb_odds_over'] = 1/model_data_1_with_preds['xgb_preds']

In [38]:
model_data_2_with_preds['lm_odds_over'] = 1/model_data_2_with_preds['lm_preds']
model_data_2_with_preds['rf_odds_over'] = 1/model_data_2_with_preds['rf_preds']
model_data_2_with_preds['xgb_odds_over'] = 1/model_data_2_with_preds['xgb_preds']

In [39]:
model_data_1_with_preds['lm_odds_under'] = 1/(1-model_data_1_with_preds['lm_preds'])
model_data_1_with_preds['rf_odds_under'] = 1/(1-model_data_1_with_preds['rf_preds'])
model_data_1_with_preds['xgb_odds_under'] = 1/(1-model_data_1_with_preds['xgb_preds'])

In [40]:
model_data_2_with_preds['lm_odds_under'] = 1/(1-model_data_2_with_preds['lm_preds'])
model_data_2_with_preds['rf_odds_under'] = 1/(1-model_data_2_with_preds['rf_preds'])
model_data_2_with_preds['xgb_odds_under'] = 1/(1-model_data_2_with_preds['xgb_preds'])

In [41]:
model_data_3_with_preds = viable_matches_with_model_data3.copy()

model_data_4_with_preds = viable_matches_with_model_data4.copy()

In [42]:
model_data_1_with_preds.shape

(461, 377)

In [43]:
model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1).shape

(324, 377)

In [44]:
# remove duplicate market_id and datetime_utc to remove error where placing wrong bets
remove_duplicates = True
if remove_duplicates:
    market_datetime_counts1 = model_data_1_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})
    market_datetime_counts2 = model_data_2_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})
    market_datetime_counts3 = model_data_3_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})
    market_datetime_counts4 = model_data_4_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})
    
    model_data_1_with_preds = model_data_1_with_preds[
        ~model_data_1_with_preds['market_id'].isin(market_datetime_counts1.loc[market_datetime_counts1['count']>1, 'market_id'])]
    model_data_2_with_preds = model_data_2_with_preds[
        ~model_data_2_with_preds['market_id'].isin(market_datetime_counts2.loc[market_datetime_counts2['count']>1, 'market_id'])]
    model_data_3_with_preds = model_data_3_with_preds[
        ~model_data_3_with_preds['market_id'].isin(market_datetime_counts3.loc[market_datetime_counts3['count']>1, 'market_id'])]
    model_data_4_with_preds = model_data_4_with_preds[
        ~model_data_4_with_preds['market_id'].isin(market_datetime_counts4.loc[market_datetime_counts4['count']>1, 'market_id'])]
    
    print(f"""Removed {sum(market_datetime_counts1['count']>1)}, {sum(market_datetime_counts2['count']>1)},\
          {sum(market_datetime_counts3['count']>1)}, {sum(market_datetime_counts4['count']>1)}""")

Removed 0, 0,          35, 12


In [45]:
first_or_last = 'first'

if first_or_last == 'first':
    
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).head(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

else:
    model_data_1_with_preds_unique = model_data_1_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_2_with_preds_unique = model_data_2_with_preds_unique[model_data_2_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

    model_data_3_with_preds_unique = model_data_3_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_3_with_preds_unique = model_data_3_with_preds_unique[model_data_3_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]
    
    model_data_4_with_preds_unique = model_data_4_with_preds.groupby(['match_ref', 'next_prediction_time']).tail(1)
    model_data_4_with_preds_unique = model_data_4_with_preds_unique[model_data_4_with_preds_unique[f'goals_post_50m_added_on_data'].notnull()]

In [46]:
model_data_3_with_preds_unique.shape

(148, 518)

In [47]:
model_data_3_with_preds_unique.shape

(148, 518)

In [101]:
back_lay_max_pc = 0.1
odds_gap_min = 0.0

model_data_1_with_preds_unique['action'] = 'None'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_over_lay_1']/model_data_1_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_1_with_preds_unique.loc[
    (model_data_1_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_1_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_1_with_preds_unique['actual_odds_under_lay_1']/model_data_1_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [102]:
model_data_2_with_preds_unique['action'] = 'None'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_over_lay_1']/model_data_2_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_2_with_preds_unique.loc[
    (model_data_2_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_2_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_2_with_preds_unique['actual_odds_under_lay_1']/model_data_2_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [103]:
model_data_3_with_preds_unique['action'] = 'None'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_over_lay_1']/model_data_3_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_3_with_preds_unique.loc[
    (model_data_3_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_3_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_3_with_preds_unique['actual_odds_under_lay_1']/model_data_3_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [104]:
model_data_4_with_preds_unique['action'] = 'None'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_over_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_over_lay_1']/model_data_4_with_preds_unique['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds_unique.loc[
    (model_data_4_with_preds_unique['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds_unique['actual_odds_under_back_1']) &
    (model_data_4_with_preds_unique['actual_odds_under_lay_1']/model_data_4_with_preds_unique['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [105]:
# need to add outcomes
model_data_1_with_preds_unique['outcome'] = None
model_data_2_with_preds_unique['outcome'] = None
model_data_3_with_preds_unique['outcome'] = None
model_data_4_with_preds_unique['outcome'] = None
for p in prediction_times:
    mask = model_data_1_with_preds_unique['next_prediction_time']==p
    model_data_1_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_1_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_2_with_preds_unique['next_prediction_time']==p
    model_data_2_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_2_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_3_with_preds_unique['next_prediction_time']==p
    model_data_3_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_3_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1
    
    mask = model_data_4_with_preds_unique['next_prediction_time']==p
    model_data_4_with_preds_unique.loc[mask, 'outcome_orig'] = (model_data_4_with_preds_unique.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1


model_data_1_with_preds_unique['outcome'] = (model_data_1_with_preds_unique['total_goals_added_on_data'] > model_data_1_with_preds_unique['total_goals'])*1
model_data_2_with_preds_unique['outcome'] = (model_data_2_with_preds_unique['total_goals_added_on_data'] > model_data_2_with_preds_unique['total_goals'])*1
model_data_3_with_preds_unique['outcome'] = (model_data_3_with_preds_unique['total_goals_added_on_data'] > model_data_3_with_preds_unique['total_goals'])*1
model_data_4_with_preds_unique['outcome'] = (model_data_4_with_preds_unique['total_goals_added_on_data'] > model_data_4_with_preds_unique['total_goals'])*1

In [106]:
# sum(model_data_1_with_preds_unique['outcome']!=model_data_1_with_preds_unique['outcome_orig'])/len(model_data_1_with_preds_unique)
sum(model_data_2_with_preds_unique['outcome']!=model_data_2_with_preds_unique['outcome_orig'])/len(model_data_2_with_preds_unique)
# sum(model_data_3_with_preds_unique['outcome']!=model_data_3_with_preds_unique['outcome_orig'])/len(model_data_3_with_preds_unique)

0.013921113689095127

In [187]:
model_data_1_with_preds_unique['bet'] = 1
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_1_with_preds_unique['win'] = 0
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['outcome']==1), 'win'] = 1
model_data_1_with_preds_unique.loc[(model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['outcome']==0), 'win'] = 1

In [188]:
model_data_1_with_preds_unique['bf_odds_to_use_over'] = model_data_1_with_preds_unique['actual_odds_over_back_3']
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_1_with_preds_unique.loc[
    model_data_1_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_2']
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_1_with_preds_unique.loc[
    model_data_1_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_1']

model_data_1_with_preds_unique['bf_odds_to_use_under'] = model_data_1_with_preds_unique['actual_odds_under_back_3']
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_1_with_preds_unique.loc[
    model_data_1_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_2']
model_data_1_with_preds_unique.loc[model_data_1_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_1_with_preds_unique.loc[
    model_data_1_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_1']

In [189]:
model_data_2_with_preds_unique['bf_odds_to_use_over'] = model_data_2_with_preds_unique['actual_odds_over_back_3']
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_2_with_preds_unique.loc[
    model_data_2_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_2']
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_2_with_preds_unique.loc[
    model_data_2_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_1']

model_data_2_with_preds_unique['bf_odds_to_use_under'] = model_data_2_with_preds_unique['actual_odds_under_back_3']
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_2_with_preds_unique.loc[
    model_data_2_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_2']
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_2_with_preds_unique.loc[
    model_data_2_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_1']

In [190]:
model_data_3_with_preds_unique['bf_odds_to_use_over'] = model_data_3_with_preds_unique['actual_odds_over_back_3']
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_3_with_preds_unique.loc[
    model_data_3_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_2']
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_3_with_preds_unique.loc[
    model_data_3_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_1']

model_data_3_with_preds_unique['bf_odds_to_use_under'] = model_data_3_with_preds_unique['actual_odds_under_back_3']
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_3_with_preds_unique.loc[
    model_data_3_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_2']
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_3_with_preds_unique.loc[
    model_data_3_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_1']

In [191]:
model_data_4_with_preds_unique['bf_odds_to_use_over'] = model_data_4_with_preds_unique['actual_odds_over_back_3']
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_4_with_preds_unique.loc[
    model_data_4_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_2']
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['bf_odds_to_use_over'].isnull(), 'bf_odds_to_use_over'] = model_data_4_with_preds_unique.loc[
    model_data_4_with_preds_unique['bf_odds_to_use_over'].isnull(), 'actual_odds_over_back_1']

model_data_4_with_preds_unique['bf_odds_to_use_under'] = model_data_4_with_preds_unique['actual_odds_under_back_3']
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_4_with_preds_unique.loc[
    model_data_4_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_2']
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['bf_odds_to_use_under'].isnull(), 'bf_odds_to_use_under'] = model_data_4_with_preds_unique.loc[
    model_data_4_with_preds_unique['bf_odds_to_use_under'].isnull(), 'actual_odds_under_back_1']

In [192]:
model_data_1_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[over_win_mask, 'bf_odds_to_use_over']

under_win_mask = (model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['win']==1)
model_data_1_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_1_with_preds_unique.loc[under_win_mask, 'bf_odds_to_use_under']

In [193]:
model_data_2_with_preds_unique['bet'] = 1
model_data_2_with_preds_unique.loc[model_data_2_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_2_with_preds_unique['win'] = 0
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['outcome']==1), 'win'] = 1
model_data_2_with_preds_unique.loc[(model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['outcome']==0), 'win'] = 1

In [194]:
model_data_2_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[over_win_mask, 'bf_odds_to_use_over']

under_win_mask = (model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['win']==1)
model_data_2_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_2_with_preds_unique.loc[under_win_mask, 'bf_odds_to_use_under']

In [195]:
model_data_3_with_preds_unique['bet'] = 1
model_data_3_with_preds_unique.loc[model_data_3_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_3_with_preds_unique['win'] = 0
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['outcome']==1), 'win'] = 1
model_data_3_with_preds_unique.loc[(model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['outcome']==0), 'win'] = 1

In [196]:
model_data_3_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[over_win_mask, 'bf_odds_to_use_over']

under_win_mask = (model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['win']==1)
model_data_3_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_3_with_preds_unique.loc[under_win_mask, 'bf_odds_to_use_under']

In [197]:
model_data_4_with_preds_unique['bet'] = 1
model_data_4_with_preds_unique.loc[model_data_4_with_preds_unique['action'] == 'None', 'bet'] = 0
model_data_4_with_preds_unique['win'] = 0
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['outcome']==1), 'win'] = 1
model_data_4_with_preds_unique.loc[(model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['outcome']==0), 'win'] = 1

In [198]:
model_data_4_with_preds_unique['winnings'] = 0

over_win_mask = (model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[over_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[over_win_mask, 'bf_odds_to_use_over']

under_win_mask = (model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['win']==1)
model_data_4_with_preds_unique.loc[under_win_mask, 'winnings'] = model_data_4_with_preds_unique.loc[under_win_mask, 'bf_odds_to_use_under']

In [199]:
max_bet = 100
model_data_1_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_1_with_preds_unique['action']=='under'
model_data_1_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_1_with_preds_unique['action']=='over'
model_data_1_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_1_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_1_with_preds_unique['max_winnings'] = model_data_1_with_preds_unique['max_bet_amount']*model_data_1_with_preds_unique['winnings']

model_data_1_with_preds_unique['profit'] = model_data_1_with_preds_unique['winnings'] - model_data_1_with_preds_unique['bet']
model_data_1_with_preds_unique['max_profit'] = model_data_1_with_preds_unique['max_winnings'] - model_data_1_with_preds_unique['max_bet_amount']

In [200]:
model_data_2_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_2_with_preds_unique['action']=='under'
model_data_2_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_2_with_preds_unique['action']=='over'
model_data_2_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_2_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_2_with_preds_unique['max_winnings'] = model_data_2_with_preds_unique['max_bet_amount']*model_data_2_with_preds_unique['winnings']

model_data_2_with_preds_unique['profit'] = model_data_2_with_preds_unique['winnings'] - model_data_2_with_preds_unique['bet']
model_data_2_with_preds_unique['max_profit'] = model_data_2_with_preds_unique['max_winnings'] - model_data_2_with_preds_unique['max_bet_amount']

In [201]:
model_data_3_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_3_with_preds_unique['action']=='under'
model_data_3_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_3_with_preds_unique['action']=='over'
model_data_3_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_3_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_3_with_preds_unique['max_winnings'] = model_data_3_with_preds_unique['max_bet_amount']*model_data_3_with_preds_unique['winnings']

model_data_3_with_preds_unique['profit'] = model_data_3_with_preds_unique['winnings'] - model_data_3_with_preds_unique['bet']
model_data_3_with_preds_unique['max_profit'] = model_data_3_with_preds_unique['max_winnings'] - model_data_3_with_preds_unique['max_bet_amount']

In [202]:
model_data_4_with_preds_unique['max_bet_amount'] = 0

under_mask = model_data_4_with_preds_unique['action']=='under'
model_data_4_with_preds_unique.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds_unique['action']=='over'
model_data_4_with_preds_unique.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds_unique.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds_unique['max_winnings'] = model_data_4_with_preds_unique['max_bet_amount']*model_data_4_with_preds_unique['winnings']

model_data_4_with_preds_unique['profit'] = model_data_4_with_preds_unique['winnings'] - model_data_4_with_preds_unique['bet']
model_data_4_with_preds_unique['max_profit'] = model_data_4_with_preds_unique['max_winnings'] - model_data_4_with_preds_unique['max_bet_amount']

In [203]:
model_data_1_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,22,0,0,0.0,0.0,0.0,0.0,0.0
50,over,20,20,14,20.01,0.01,1863.13,1812.7075,-50.4225
50,under,12,12,4,17.0,5.0,838.8,1472.92,634.12
60,,21,0,0,0.0,0.0,0.0,0.0,0.0
60,over,20,20,11,17.85,-2.15,1731.98,1656.4962,-75.4838
60,under,13,13,4,13.6,0.6,1050.22,1038.835,-11.385
70,,16,0,0,0.0,0.0,0.0,0.0,0.0
70,over,15,15,8,17.44,2.44,1028.98,1186.0937,157.1137
70,under,23,23,10,23.86,0.86,1743.52,1795.9906,52.4706
75,,14,0,0,0.0,0.0,0.0,0.0,0.0


In [204]:
model_data_1_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,97,0,0,0.0,0.0,0.0,0.0,0.0
over,85,85,39,80.08,-4.92,6882.67,6249.6774,-632.9926
under,142,142,81,166.67,24.67,11835.17,13860.4982,2025.3282


In [205]:
model_data_2_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,60,0,0,0.0,0.0,0.0,0.0,0.0
50,over,53,53,42,58.7,5.7,4199.27,4795.0203,595.7503
50,under,31,31,9,45.6,14.6,1746.38,2009.288,262.908
60,,58,0,0,0.0,0.0,0.0,0.0,0.0
60,over,47,47,35,55.23,8.23,3533.86,4324.8431,790.9831
60,under,39,39,16,55.94,16.94,2419.09,3396.7253,977.6353
70,,50,0,0,0.0,0.0,0.0,0.0,0.0
70,over,45,45,29,55.47,10.47,3154.42,3775.3085,620.8885
70,under,48,48,21,51.96,3.96,3113.07,3404.585,291.515
75,,48,0,0,0.0,0.0,0.0,0.0,0.0


In [206]:
model_data_2_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,328,0,0,0.0,0.0,0.0,0.0,0.0
over,250,250,149,288.12,38.12,16887.13,21032.5929,4145.4629
under,284,284,158,351.28,67.28,18759.17,23141.7965,4382.6265


In [207]:
model_data_3_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,8,0,0,0.0,0.0,0.0,0.0,0.0
50,over,12,12,7,10.56,-1.44,1005.53,872.1186,-133.4114
50,under,7,7,4,18.65,11.65,388.86,1218.872,830.012
60,,8,0,0,0.0,0.0,0.0,0.0,0.0
60,over,11,11,6,9.44,-1.56,810.44,809.5685,-0.8715
60,under,6,6,4,12.46,6.46,411.27,790.8016,379.5316
70,,7,0,0,0.0,0.0,0.0,0.0,0.0
70,over,6,6,2,4.02,-1.98,356.98,163.3902,-193.5898
70,under,10,10,6,14.6,4.6,807.04,1131.2564,324.2164
75,,12,0,0,0.0,0.0,0.0,0.0,0.0


In [208]:
model_data_3_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,54,0,0,0.0,0.0,0.0,0.0,0.0
over,45,45,21,42.19,-2.81,3255.1,3279.8475,24.7475
under,49,49,37,87.26,38.26,3683.77,6449.5472,2765.7772


In [209]:
model_data_4_with_preds_unique.groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,207,0,0,0.0,0.0,0.0,0.0,0.0
50,over,200,200,145,203.68,3.68,16934.59,17312.7736,378.1836
50,under,96,96,17,77.95,-18.05,6203.0,4924.539,-1278.461
60,,193,0,0,0.0,0.0,0.0,0.0,0.0
60,over,154,154,94,161.74,7.74,12356.15,12425.4272,69.2772
60,under,139,139,44,151.22,12.22,8882.82,9746.8121,863.9921
70,,218,0,0,0.0,0.0,0.0,0.0,0.0
70,over,126,126,60,120.48,-5.52,10097.24,9799.5088,-297.7312
70,under,174,174,77,189.21,15.21,10904.39,13071.618,2167.228
75,,183,0,0,0.0,0.0,0.0,0.0,0.0


In [210]:
model_data_4_with_preds_unique.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,1150,0,0,0.0,0.0,0.0,0.0,0.0
over,770,770,400,769.8,-0.2,59419.3,58576.7037,-842.5963
under,1104,1104,571,1173.16,69.16,76844.05,83651.205,6807.155


In [211]:
from_date = '2023-01-28 00:00'
model_data_4_with_preds_unique[model_data_4_with_preds_unique['datetime_utc']>=from_date].groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
next_prediction_time,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50,,117,0,0,0.0,0.0,0.0,0.0,0.0
50,over,110,110,81,114.24,4.24,9166.63,9788.8951,622.2651
50,under,50,50,9,38.9,-11.1,3281.76,2239.059,-1042.701
60,,106,0,0,0.0,0.0,0.0,0.0,0.0
60,over,84,84,53,94.2,10.2,6486.99,6665.1205,178.1305
60,under,71,71,21,71.1,0.1,4211.11,4215.3238,4.2138
70,,123,0,0,0.0,0.0,0.0,0.0,0.0
70,over,74,74,40,80.57,6.57,5872.59,6661.5179,788.9279
70,under,90,90,36,87.52,-2.48,5073.94,5318.8826,244.9426
75,,105,0,0,0.0,0.0,0.0,0.0,0.0


In [212]:
model_data_4_with_preds_unique[model_data_4_with_preds_unique['datetime_utc']>=from_date].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,659,0,0,0.0,0.0,0.0,0.0,0.0
over,415,415,230,442.38,27.38,31064.58,32783.2192,1718.6392
under,589,589,302,606.28,17.28,39261.97,41413.5028,2151.5328


In [178]:
min_date = '2023-01-28 00:00' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 to Jan 13, 4 is Jan 14 on
max_date = '2023-02-28 00:00'

mask_1 = model_data_1_with_preds_unique['datetime_utc'].between(min_date, max_date)
mask_2 = model_data_2_with_preds_unique['datetime_utc'].between(min_date, max_date)
mask_3 = model_data_3_with_preds_unique['datetime_utc'].between(min_date, max_date)
mask_4 = model_data_4_with_preds_unique['datetime_utc'].between(min_date, max_date)

bets = sum(model_data_1_with_preds_unique.loc[mask_1, 'bet']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'bet']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'bet']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'bet'])
wins = sum(model_data_1_with_preds_unique.loc[mask_1, 'win']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'win']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'win']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'win'])
winnings = sum(model_data_1_with_preds_unique.loc[mask_1, 'winnings']) + sum(model_data_2_with_preds_unique.loc[mask_2, 'winnings']) + sum(model_data_3_with_preds_unique.loc[mask_3, 'winnings']) + sum(model_data_4_with_preds_unique.loc[mask_4, 'winnings'])
matches = (len(model_data_1_with_preds_unique.loc[mask_1, 'match_ref'].unique()) + len(model_data_2_with_preds_unique.loc[mask_2, 'match_ref'].unique()) +
           len(model_data_3_with_preds_unique.loc[mask_3, 'match_ref'].unique()) + len(model_data_4_with_preds_unique.loc[mask_4, 'match_ref'].unique()))

In [179]:
print(f'Total bets {bets}, total wins {wins}, total winnings {round(winnings,2)}, profit {round(100*(winnings-bets)/bets - 1,2)}%')

Total bets 1004, total wins 532, total winnings 1082.18, profit 6.79%


In [180]:
expected_wins = int(wins*bets/winnings)
expected_wins

493

In [181]:
expected_p = expected_wins/bets
expected_p

0.4910358565737052

In [182]:
actual_p = wins/bets
actual_p

0.5298804780876494

In [183]:
import scipy.stats as sps
sps.binom.cdf(wins, bets, expected_p)

0.9936792923911102

In [184]:
matches

348

In [185]:
sps.binom.cdf(np.ceil(wins*matches/bets), matches, expected_p)

0.9415254132085051

In [186]:
sps.binom.cdf(np.floor(wins*matches/bets), matches, expected_p)

0.9279144546656708

#### Compare comps

In [85]:
model_data_1_with_preds_unique[model_data_1_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Spanish Copa Del Rey,22,22,10,21.19,-0.81,1751.71,1510.7453,-240.9647
Greek Super League,15,15,9,21.74,6.74,1050.92,1680.103,629.183
Italian Serie A,13,13,4,8.17,-4.83,974.07,694.7663,-279.3037
English Premier League,11,11,5,11.48,0.48,1049.63,1148.0,98.37
Turkish Super Lig,9,9,7,26.09,17.09,566.67,1337.6238,770.9538
Welsh Premier League,7,7,4,8.29,1.29,324.28,445.0209,120.7409
Scottish Premiership,2,2,0,0.0,-2.0,158.24,0.0,-158.24


In [86]:
model_data_2_with_preds_unique[model_data_2_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Coupe de France,35,35,20,56.13,21.13,2272.51,3752.8021,1480.2921
English FA Cup,23,23,14,31.55,8.55,1767.88,2357.3908,589.5108
Portuguese Primeira Liga,23,23,19,58.36,35.36,1501.09,3991.3367,2490.2467
Spanish La Liga,19,19,15,36.42,17.42,1598.59,3118.0108,1519.4208
Italian Serie A,15,15,3,7.59,-7.41,1187.08,584.508,-602.572
Scottish Premiership,12,12,7,16.1,4.1,833.4,1188.934,355.534
English National North,11,11,3,5.21,-5.79,317.36,215.7992,-101.5608
Australian A-League Men,10,10,6,12.22,2.22,650.79,787.613,136.823
Greek Super League,8,8,4,9.98,1.98,493.05,624.2128,131.1628
Dutch Eredivisie,6,6,4,13.64,7.64,349.17,596.59,247.42


In [87]:
model_data_3_with_preds_unique[model_data_3_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
French Ligue 1,8,8,5,11.16,3.16,634.39,873.4948,239.1048
English League Cup,5,5,5,16.4,11.4,500.0,1640.0,1140.0
Welsh Premier League,5,5,0,0.0,-5.0,198.79,0.0,-198.79
English FA Cup,3,3,0,0.0,-3.0,248.77,0.0,-248.77
English National League,3,3,2,6.12,3.12,136.98,340.884,203.904
Australian A-League Men,2,2,1,1.41,-0.59,200.0,141.0,-59.0
Coppa Italia,2,2,1,1.55,-0.45,150.75,155.0,4.25
Greek Super League,2,2,2,3.61,1.61,131.51,225.3898,93.8798
Italian Serie A,2,2,1,2.48,0.48,200.0,248.0,48.0
English Football League Trophy,1,1,0,0.0,-1.0,30.92,0.0,-30.92


In [88]:
model_data_4_with_preds_unique[model_data_4_with_preds_unique['action']!='None'].groupby('competition_name').agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('match_ref', ascending=False)

Unnamed: 0_level_0,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit
competition_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Turkish Super Lig,51,51,21,44.11,-6.89,3811.9,3911.2194,99.3194
Spanish La Liga,49,49,25,58.55,9.55,3955.24,4178.91,223.67
French Ligue 1,44,44,22,56.17,12.17,3727.16,4839.4518,1112.2918
Australian A-League Men,34,34,17,41.71,7.71,1870.58,2246.6723,376.0923
Greek Super League,34,34,18,44.38,10.38,2389.69,2862.7181,473.0281
English National League,32,32,6,12.87,-19.13,1239.29,360.0461,-879.2439
Scottish Premiership,27,27,13,36.74,9.74,1878.23,2243.067,364.837
Spanish Copa Del Rey,20,20,7,20.68,0.68,1669.68,1869.184,199.504
Scottish FA Cup,20,20,7,21.47,1.47,689.72,1064.0603,374.3403
Portuguese Primeira Liga,20,20,9,18.11,-1.89,1494.57,1422.8245,-71.7455


#### Compare odds

In [89]:
odds_bands = [0, 1.5, 2, 3, 4, 5, 10, 999999]

model_data_1_with_preds_unique['odds_band'] = None
model_data_1_with_preds_unique['odds_used'] = None
for i, o in enumerate(odds_bands):
    over_mask = (model_data_1_with_preds_unique['action']=='over') & (model_data_1_with_preds_unique['actual_odds_over_back_3']>o)
    model_data_1_with_preds_unique.loc[over_mask, 'odds_band'] = i
    model_data_1_with_preds_unique.loc[over_mask, 'odds_used'] = model_data_1_with_preds_unique.loc[over_mask, 'actual_odds_over_back_3']
    
    under_mask = (model_data_1_with_preds_unique['action']=='under') & (model_data_1_with_preds_unique['actual_odds_under_back_3']>o)
    model_data_1_with_preds_unique.loc[under_mask, 'odds_band'] = i
    model_data_1_with_preds_unique.loc[under_mask, 'odds_used'] = model_data_1_with_preds_unique.loc[under_mask, 'actual_odds_under_back_3']

In [90]:
model_data_2_with_preds_unique['odds_band'] = None
model_data_2_with_preds_unique['odds_used'] = None
for i, o in enumerate(odds_bands):
    over_mask = (model_data_2_with_preds_unique['action']=='over') & (model_data_2_with_preds_unique['actual_odds_over_back_3']>o)
    model_data_2_with_preds_unique.loc[over_mask, 'odds_band'] = i
    model_data_2_with_preds_unique.loc[over_mask, 'odds_used'] = model_data_2_with_preds_unique.loc[over_mask, 'actual_odds_over_back_3']
    
    under_mask = (model_data_2_with_preds_unique['action']=='under') & (model_data_2_with_preds_unique['actual_odds_under_back_3']>o)
    model_data_2_with_preds_unique.loc[under_mask, 'odds_band'] = i
    model_data_2_with_preds_unique.loc[under_mask, 'odds_used'] = model_data_2_with_preds_unique.loc[under_mask, 'actual_odds_under_back_3']

In [91]:
model_data_3_with_preds_unique['odds_band'] = None
model_data_3_with_preds_unique['odds_used'] = None
for i, o in enumerate(odds_bands):
    over_mask = (model_data_3_with_preds_unique['action']=='over') & (model_data_3_with_preds_unique['actual_odds_over_back_3']>o)
    model_data_3_with_preds_unique.loc[over_mask, 'odds_band'] = i
    model_data_3_with_preds_unique.loc[over_mask, 'odds_used'] = model_data_3_with_preds_unique.loc[over_mask, 'actual_odds_over_back_3']
    
    under_mask = (model_data_3_with_preds_unique['action']=='under') & (model_data_3_with_preds_unique['actual_odds_under_back_3']>o)
    model_data_3_with_preds_unique.loc[under_mask, 'odds_band'] = i
    model_data_3_with_preds_unique.loc[under_mask, 'odds_used'] = model_data_3_with_preds_unique.loc[under_mask, 'actual_odds_under_back_3']

In [92]:
model_data_4_with_preds_unique['odds_band'] = None
model_data_4_with_preds_unique['odds_used'] = None
for i, o in enumerate(odds_bands):
    over_mask = (model_data_4_with_preds_unique['action']=='over') & (model_data_4_with_preds_unique['actual_odds_over_back_3']>o)
    model_data_4_with_preds_unique.loc[over_mask, 'odds_band'] = i
    model_data_4_with_preds_unique.loc[over_mask, 'odds_used'] = model_data_4_with_preds_unique.loc[over_mask, 'actual_odds_over_back_3']
    
    under_mask = (model_data_4_with_preds_unique['action']=='under') & (model_data_4_with_preds_unique['actual_odds_under_back_3']>o)
    model_data_4_with_preds_unique.loc[under_mask, 'odds_band'] = i
    model_data_4_with_preds_unique.loc[under_mask, 'odds_used'] = model_data_4_with_preds_unique.loc[under_mask, 'actual_odds_under_back_3']

In [93]:
odds_comp_1 = model_data_1_with_preds_unique[model_data_1_with_preds_unique['action']!='None'].groupby('odds_band').agg({
    'odds_used': max, 'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('odds_band')

odds_comp_1['roi_pc'] = 100*(odds_comp_1['profit'] / odds_comp_1['bet'])
odds_comp_1

Unnamed: 0_level_0,odds_used,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit,roi_pc
odds_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.47,2,2,2,2.72,0.72,116.03,163.2579,47.2279,36.0
1,2.0,24,24,15,26.05,2.05,2085.44,2317.451,232.011,8.541667
2,3.0,31,31,16,37.64,6.64,2340.76,2669.0984,328.3384,21.419355
3,4.0,9,9,4,15.45,6.45,572.01,1040.072,468.062,71.666667
4,5.0,6,6,0,0.0,-6.0,440.65,0.0,-440.65,-100.0
5,10.0,7,7,2,15.1,8.1,320.63,626.38,305.75,115.714286


In [94]:
odds_comp_2 = model_data_2_with_preds_unique[model_data_2_with_preds_unique['action']!='None'].groupby('odds_band').agg({
    'odds_used': max, 'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('odds_band')

odds_comp_2['roi_pc'] = 100*(odds_comp_2['profit'] / odds_comp_2['bet'])
odds_comp_2

Unnamed: 0_level_0,odds_used,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit,roi_pc
odds_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.5,6,6,4,5.79,-0.21,427.5,392.392,-35.108,-3.5
1,2.0,46,46,34,61.01,15.01,3216.31,4432.6309,1216.3209,32.630435
2,3.0,74,74,40,98.58,24.58,5120.87,7289.895,2169.025,33.216216
3,4.0,24,24,10,35.7,11.7,1378.71,2044.5345,665.8245,48.75
4,5.0,20,20,6,25.4,5.4,1243.61,1660.245,416.635,27.0
5,8.0,12,12,5,29.6,17.6,687.83,2100.7,1412.87,146.666667
6,11.5,1,1,0,0.0,-1.0,75.9,0.0,-75.9,-100.0


In [95]:
odds_comp_3 = model_data_3_with_preds_unique[model_data_3_with_preds_unique['action']!='None'].groupby('odds_band').agg({
    'odds_used': max, 'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('odds_band')

odds_comp_3['roi_pc'] = 100*(odds_comp_3['profit'] / odds_comp_3['bet'])
odds_comp_3

Unnamed: 0_level_0,odds_used,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit,roi_pc
odds_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.46,2,2,1,1.41,-0.59,200.0,141.0,-59.0,-29.5
1,1.98,8,8,7,12.51,4.51,645.94,1115.3898,469.4498,56.375
2,2.88,13,13,7,17.58,4.58,997.85,1515.4948,517.6448,35.230769
3,3.9,6,6,2,6.8,0.8,345.62,408.884,63.264,13.333333
4,4.3,2,2,1,4.3,2.3,200.0,430.0,230.0,115.0
5,6.4,4,4,1,6.4,2.4,242.7,640.0,397.3,60.0


In [96]:
odds_comp_4 = model_data_4_with_preds_unique[model_data_4_with_preds_unique['action']!='None'].groupby('odds_band').agg({
    'odds_used': max, 'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('odds_band')

odds_comp_4['roi_pc'] = 100*(odds_comp_4['profit'] / odds_comp_4['bet'])
odds_comp_4

Unnamed: 0_level_0,odds_used,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit,roi_pc
odds_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.5,14,14,7,10.31,-3.69,1134.97,827.0609,-307.9091,-26.357143
1,2.0,138,138,88,151.9,13.9,10109.11,11208.9361,1099.8261,10.072464
2,3.0,245,245,103,243.9,-1.1,16290.44,17054.6722,764.2322,-0.44898
3,4.0,94,94,30,102.95,8.95,6542.13,6847.64,305.51,9.521277
4,5.0,45,45,13,56.7,11.7,3135.99,4207.707,1071.717,26.0
5,10.0,48,48,7,41.7,-6.3,2953.15,2533.347,-419.803,-13.125
6,13.5,1,1,1,13.5,12.5,26.12,352.62,326.5,1250.0


In [97]:
from_date = '2023-01-01'
odds_comp_4 = model_data_4_with_preds_unique[
    (model_data_4_with_preds_unique['action']!='None') &
    (model_data_4_with_preds_unique['datetime_utc']>=from_date)].groupby('odds_band').agg({
    'odds_used': max, 'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum
}).sort_values('odds_band')

odds_comp_4['roi_pc'] = 100*(odds_comp_4['profit'] / odds_comp_4['bet'])
odds_comp_4

Unnamed: 0_level_0,odds_used,match_ref,bet,win,winnings,profit,max_bet_amount,max_winnings,max_profit,roi_pc
odds_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.5,14,14,7,10.31,-3.69,1134.97,827.0609,-307.9091,-26.357143
1,2.0,138,138,88,151.9,13.9,10109.11,11208.9361,1099.8261,10.072464
2,3.0,245,245,103,243.9,-1.1,16290.44,17054.6722,764.2322,-0.44898
3,4.0,94,94,30,102.95,8.95,6542.13,6847.64,305.51,9.521277
4,5.0,45,45,13,56.7,11.7,3135.99,4207.707,1071.717,26.0
5,10.0,48,48,7,41.7,-6.3,2953.15,2533.347,-419.803,-13.125
6,13.5,1,1,1,13.5,12.5,26.12,352.62,326.5,1250.0


In [98]:
break

SyntaxError: 'break' outside loop (<ipython-input-98-6aaf1f276005>, line 4)

# Data issues to look into
* Duplicate matches - THIS IS BECAUSE IT RERUNS FOR ANOTHER MATCH WITHIN 1 MINUTE AND THE SAME MATCH IS STILL WITHIN THE ALLOWABLE TIME RANGE
* ODDS CHANGES A LOT IN ONE MINUTE LATE ON! - Seems like this is ok based on testing, can also focus on earlier bets if a worry
* SL and BF string matching issues
* Delay (and think about how this will affect things in practice
* Goals scored differences between data at odds time and the retrospective data collection
* Oddly large looking odds

#### Delay times

In [None]:
model_data_2_with_preds_unique['delay_time'].describe()

#### Weirdly large looking odds

In [None]:
min_date = '2023-01-11' # 1 is Jan 2 to Jan 5, 2 is Jan 6 to Jan 8, 3 is Jan 9 on
max_date = '2023-01-12'
prediction_time = 80
over_under = 'over'

mask = (
    model_data_3_with_preds_unique['match_date'].between(min_date, max_date) &
    (model_data_3_with_preds_unique['next_prediction_time']==prediction_time) &
    (model_data_3_with_preds_unique['action']==over_under)
)

model_data_3_with_preds_unique.loc[
    mask,
    ['match_date', 'team_a_name', 'team_b_name', 'betfair_name', 'team_a_score', 'team_b_score', 'runner_name_over',
     f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',
     'action', 'bet', 'win', 'winnings', 'max_bet_amount', 'max_winnings', 'profit','max_profit']]

#### Check test vs actual results

In [None]:
model_data_4_with_preds['action'] = 'None'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['rf_odds_over']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_over_back_1']) &
    (model_data_4_with_preds['actual_odds_over_lay_1']/model_data_4_with_preds['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_4_with_preds.loc[
    (model_data_4_with_preds['lm_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['rf_odds_under']*(1+odds_gap_min)<model_data_4_with_preds['actual_odds_under_back_1']) &
    (model_data_4_with_preds['actual_odds_under_lay_1']/model_data_4_with_preds['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [None]:
# need to add outcomes
model_data_4_with_preds['outcome'] = None
for p in prediction_times:
    mask = model_data_4_with_preds['next_prediction_time']==p
    model_data_4_with_preds.loc[mask, 'outcome_orig'] = (model_data_4_with_preds.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1

model_data_4_with_preds['outcome'] = (model_data_4_with_preds['total_goals_added_on_data'] > model_data_4_with_preds['total_goals'])*1

In [None]:
model_data_4_with_preds['bet'] = 1
model_data_4_with_preds.loc[model_data_4_with_preds['action'] == 'None', 'bet'] = 0
model_data_4_with_preds['win'] = 0
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['outcome']==1), 'win'] = 1
model_data_4_with_preds.loc[(model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['outcome']==0), 'win'] = 1

In [None]:
model_data_4_with_preds['winnings'] = 0

over_win_mask = (model_data_4_with_preds['action']=='over') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[over_win_mask, 'winnings'] = model_data_4_with_preds.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_4_with_preds['action']=='under') & (model_data_4_with_preds['win']==1)
model_data_4_with_preds.loc[under_win_mask, 'winnings'] = model_data_4_with_preds.loc[under_win_mask, 'actual_odds_under_back_3']

In [None]:
model_data_4_with_preds['max_bet_amount'] = 0

under_mask = model_data_4_with_preds['action']=='under'
model_data_4_with_preds.loc[under_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_4_with_preds['action']=='over'
model_data_4_with_preds.loc[over_mask, 'max_bet_amount'] = model_data_4_with_preds.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_4_with_preds['max_winnings'] = model_data_4_with_preds['max_bet_amount']*model_data_4_with_preds['winnings']

model_data_4_with_preds['profit'] = model_data_4_with_preds['winnings'] - model_data_4_with_preds['bet']
model_data_4_with_preds['max_profit'] = model_data_4_with_preds['max_winnings'] - model_data_4_with_preds['max_bet_amount']

In [None]:
from_date = '2023-01-16 00:00'
model_data_4_with_preds[model_data_4_with_preds['datetime_utc']>=from_date].groupby(['next_prediction_time', 'action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_4_with_preds[model_data_4_with_preds['datetime_utc']>=from_date].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
# get bet outcomes
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/sl_bf_late_goals'
sql_engine = sqlalchemy.create_engine(connect_string)
order_results = pd.read_sql('''SELECT * FROM testing_live_order_results''', con=sql_engine)

In [None]:
order_results.head(3)

In [None]:
recent_bets = model_data_4_with_preds[(model_data_4_with_preds['datetime_utc']>=from_date) & (model_data_4_with_preds['action']!='None')]
len(recent_bets)

In [None]:
recent_bets_with_outcomes = recent_bets.merge(
    order_results[['status', 'market_id', 'datetime_utc', 'price', 'average_price_matched', 'order_status', 'bet_id']], how='left', on=['market_id', 'datetime_utc'])

In [None]:
recent_bets_with_outcomes.shape

In [None]:
recent_bets_with_outcomes.head(3)

In [None]:
recent_bets_with_outcomes[['match_date', 'datetime_utc', 'betfair_id', 'team_a_name', 'team_b_name', 'betfair_name',
                           'action', 'bet', 'win', 'winnings', 'profit', 'actual_odds_over_back_1', 'actual_odds_under_back_1',
                           'status', 'price', 'average_price_matched', 'order_status',
                           'max_bet_amount', 'max_winnings', 'max_profit', 'clock',
                           'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under',]].head(3)

In [None]:
recent_bets_with_outcomes['actual_winnings'] = recent_bets_with_outcomes['win']*recent_bets_with_outcomes['average_price_matched'].astype(float)
recent_bets_with_outcomes['actual_winnings'] = recent_bets_with_outcomes['actual_winnings'].fillna(0)
recent_bets_with_outcomes['actual_profit'] = recent_bets_with_outcomes['actual_winnings'] - 1#recent_bets_with_outcomes['bet']

In [None]:
recent_bets_with_outcomes.groupby(['status', 'order_status']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'actual_winnings': sum, 'actual_profit': sum})

Get actual PL etc from betfair

In [None]:
header = {'X-Application': application, 'Content-Type': 'application/x-www-form-urlencoded'}
auth = 'username='+username+'&password='+password

try:
    login = requests.post('https://identitysso-cert.betfair.com/api/certlogin',
                      cert=('/etc/ssl/client-2048.crt', '/etc/ssl/client-2048.key'),
                      headers=header, data=auth, timeout=5)
except Exception as e:
    print('Request error: '+str(e))
login_success = login.json()['loginStatus']
logging.info('Login '+str(login_success))

In [None]:
ssoid = login.json()['sessionToken']
headers = {'X-Application': application, 'X-Authentication': ssoid, 'content-type': 'application/json'}

In [None]:
bet_url = "https://api.betfair.com/exchange/betting/json-rpc/v1"

In [None]:
markets = list(recent_bets_with_outcomes['market_id'].unique())
bet_outcomes_bf = []
for m in markets:
    
    # get settled bets
    outcome_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listClearedOrders",\
                    "params": {"marketIds": ["' + str(m) + '"], "betStatus": "SETTLED"}, "id": 1}'
    request = requests.post(bet_url, data=outcome_req.encode('utf-8'), headers=headers)
    outcome_result = request.json()
    bet_outcomes_bf += outcome_result['result']['clearedOrders']

In [None]:
def parse_bet_outcome(bet_outcome):
    return [
        bet_outcome.get('eventTypeId', None),
        bet_outcome.get('eventId', None),
        bet_outcome.get('marketId', None),
        bet_outcome.get('selectionId', None),
        bet_outcome.get('handicap', None),
        bet_outcome.get('betId', None),
        bet_outcome.get('placedDate', None),
        bet_outcome.get('persistenceType', None),
        bet_outcome.get('orderType', None),
        bet_outcome.get('side', None),
        bet_outcome.get('betOutcome', None),
        bet_outcome.get('priceRequested', None),
        bet_outcome.get('settledDate', None),
        bet_outcome.get('lastMatchedDate', None),
        bet_outcome.get('betCount', None),
        bet_outcome.get('priceMatched', None),
        bet_outcome.get('priceReduced', None),
        bet_outcome.get('sizeSettled', None),
        bet_outcome.get('sizeCancelled', None),
        bet_outcome.get('profit', None),
    ]

outcome_cols = ['event_type_id', 'event_id', 'market_id', 'selection_id', 'handicap', 'bet_id', 'placed_date', 'persistence_type',
                'order_type', 'side', 'bet_outcome', 'price_requested', 'settled_date', 'last_matched_date', 'bet_count', 'price_matched',
                'price_reduced', 'size_settled', 'size_cancelled', 'profit']

In [None]:
bet_outcomes_bf_df = pd.DataFrame([parse_bet_outcome(b) for b in bet_outcomes_bf], columns=outcome_cols)

In [None]:
bet_outcomes_bf_df['bet_win_bf'] = (bet_outcomes_bf_df['bet_outcome']=='WON')*1

In [None]:
bet_outcomes_bf_df.shape

In [None]:
recent_bets_with_outcomes_and_bf = recent_bets_with_outcomes.merge(
    bet_outcomes_bf_df[['bet_id', 'bet_outcome', 'price_requested', 'price_matched', 'profit', 'bet_win_bf']], how='left', on='bet_id', suffixes=('', '_bf'))

In [None]:
recent_bets_with_outcomes_and_bf.shape

In [None]:
recent_bets_with_outcomes_and_bf.groupby(['status', 'order_status']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'actual_winnings': sum, 'actual_profit': sum,
    'bet_win_bf': sum, 'profit_bf': sum})

In [None]:
recent_bets_with_outcomes_and_bf['profit_diff'] = recent_bets_with_outcomes_and_bf['profit'] - recent_bets_with_outcomes_and_bf['profit_bf']

In [None]:
cols_to_show = [c for c in recent_bets_with_outcomes_and_bf.columns if 'goal' not in c]

In [None]:
pd.options.display.max_columns = 150
recent_bets_with_outcomes_and_bf[cols_to_show].sort_values('profit_diff').tail(100)

* 85 in betfair settled (since 2023-01-14 17:00) vs 110 bets above
* 18 in betfair cancelled
* 0 lapsed
* 7 order errors recorded in testing_live_order_results in db

In [None]:
market_id in ['1.208431813', '1.208431854', '1.208431858']

In [None]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 150
model_data_4_with_preds.loc[
    (model_data_4_with_preds['datetime_utc']>=from_date) & (model_data_4_with_preds['action']!='None')# & (model_data_4_with_preds['market_id'].isin(['1.208431813', '1.208431854', '1.208431858']))
    ,
    ['match_date', 'datetime_utc', 'betfair_id', 'team_a_name', 'team_b_name', 'betfair_name',
     'action', 'bet', 'win', 'winnings', 'profit', 'actual_odds_over_back_1', 'actual_odds_under_back_1', 'max_bet_amount', 'max_winnings','max_profit',
     'team_a_score', 'team_b_score', 'runner_name_over',
     #f'goals_pre_{prediction_time}m', f'goals_post_{prediction_time}m', 'outcome', f'goals_pre_{prediction_time}m_added_on_data', f'goals_post_{prediction_time}m_added_on_data',
     'clock', 'lm_odds_over', 'rf_odds_over', 'lm_odds_under', 'rf_odds_under', 'delay_time',
     'team_a_score_added_on_data', 'team_b_score_added_on_data',  'datetime_utc'
     ]].sort_values(['team_a_name', 'action', 'actual_odds_over_back_1'])

In [None]:
market_datetime_counts = model_data_4_with_preds[model_data_4_with_preds['datetime_utc']>=from_date].groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})

In [None]:
market_datetime_counts.groupby('count').size()

In [None]:
market_datetime_counts3 = model_data_3_with_preds.groupby(['market_id', 'datetime_utc']).size().reset_index().rename(columns={0:'count'})

In [None]:
market_datetime_counts3.groupby('count').size()

In [None]:
model_data_4_with_preds.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_4_with_preds[
    model_data_4_with_preds['market_id'].isin(market_datetime_counts.loc[market_datetime_counts['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_4_with_preds[
    ~model_data_4_with_preds['market_id'].isin(market_datetime_counts.loc[market_datetime_counts['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds['action'] = 'None'

model_data_3_with_preds.loc[
    (model_data_3_with_preds['lm_odds_over']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_over_back_1']) &
    (model_data_3_with_preds['rf_odds_over']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_over_back_1']) &
    (model_data_3_with_preds['actual_odds_over_lay_1']/model_data_3_with_preds['actual_odds_over_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'over'

model_data_3_with_preds.loc[
    (model_data_3_with_preds['lm_odds_under']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_under_back_1']) &
    (model_data_3_with_preds['rf_odds_under']*(1+odds_gap_min)<model_data_3_with_preds['actual_odds_under_back_1']) &
    (model_data_3_with_preds['actual_odds_under_lay_1']/model_data_3_with_preds['actual_odds_under_back_1'] < (1 + back_lay_max_pc)), 'action'] = 'under'

In [None]:
# need to add outcomes
model_data_3_with_preds['outcome'] = None
for p in prediction_times:
    mask = model_data_3_with_preds['next_prediction_time']==p
    model_data_3_with_preds.loc[mask, 'outcome_orig'] = (model_data_3_with_preds.loc[mask, f'goals_post_{p}m_added_on_data']>0)*1

model_data_3_with_preds['outcome'] = (model_data_3_with_preds['total_goals_added_on_data'] > model_data_3_with_preds['total_goals'])*1

In [None]:
model_data_3_with_preds['bet'] = 1
model_data_3_with_preds.loc[model_data_3_with_preds['action'] == 'None', 'bet'] = 0
model_data_3_with_preds['win'] = 0
model_data_3_with_preds.loc[(model_data_3_with_preds['action']=='over') & (model_data_3_with_preds['outcome']==1), 'win'] = 1
model_data_3_with_preds.loc[(model_data_3_with_preds['action']=='under') & (model_data_3_with_preds['outcome']==0), 'win'] = 1

In [None]:
model_data_3_with_preds['winnings'] = 0

over_win_mask = (model_data_3_with_preds['action']=='over') & (model_data_3_with_preds['win']==1)
model_data_3_with_preds.loc[over_win_mask, 'winnings'] = model_data_3_with_preds.loc[over_win_mask, 'actual_odds_over_back_3']

under_win_mask = (model_data_3_with_preds['action']=='under') & (model_data_3_with_preds['win']==1)
model_data_3_with_preds.loc[under_win_mask, 'winnings'] = model_data_3_with_preds.loc[under_win_mask, 'actual_odds_under_back_3']

In [None]:
model_data_3_with_preds['max_bet_amount'] = 0

under_mask = model_data_3_with_preds['action']=='under'
model_data_3_with_preds.loc[under_mask, 'max_bet_amount'] = model_data_3_with_preds.loc[under_mask, 'size_under_back_1'].apply(lambda x: min(x, max_bet))

over_mask = model_data_3_with_preds['action']=='over'
model_data_3_with_preds.loc[over_mask, 'max_bet_amount'] = model_data_3_with_preds.loc[over_mask, 'size_over_back_1'].apply(lambda x: min(x, max_bet))

model_data_3_with_preds['max_winnings'] = model_data_3_with_preds['max_bet_amount']*model_data_3_with_preds['winnings']

model_data_3_with_preds['profit'] = model_data_3_with_preds['winnings'] - model_data_3_with_preds['bet']
model_data_3_with_preds['max_profit'] = model_data_3_with_preds['max_winnings'] - model_data_3_with_preds['max_bet_amount']

In [None]:
model_data_3_with_preds.groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds[
    model_data_3_with_preds['market_id'].isin(market_datetime_counts3.loc[market_datetime_counts3['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})

In [None]:
model_data_3_with_preds[
    ~model_data_3_with_preds['market_id'].isin(market_datetime_counts3.loc[market_datetime_counts3['count']>1, 'market_id'])].groupby(['action']).agg({
    'match_ref': len, 'bet': sum, 'win': sum, 'winnings': sum, 'profit': sum, 'max_bet_amount': sum, 'max_winnings': sum, 'max_profit': sum})