# Betfair getting odds and placing bets

#### Useful links

* https://docs.developer.betfair.com/display/1smk3cen4v3lu3yomq5qye0ni/Betting+Enums#BettingEnums-MarketProjection for api method params


In [1]:
import requests
import urllib
import json
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook
import datetime
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

## Load models

In [5]:
with open('/home/angus/projects/betting/tote/models/betfair_places_model_60.pkl', 'rb') as f:
    betfair_places_model_60 = pickle.load(f)

with open('/home/angus/projects/betting/tote/models/betfair_places_model_120.pkl', 'rb') as f:
    betfair_places_model_120 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_180.pkl', 'rb') as f:
    betfair_places_model_180 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_240.pkl', 'rb') as f:
    betfair_places_model_240 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_300.pkl', 'rb') as f:
    betfair_places_model_300 = pickle.load(f)

## Sign in

In [6]:
header = {'X-Application': application, 'Content-Type': 'application/x-www-form-urlencoded'}
auth = 'username='+username+'&password='+password

login = requests.post('https://identitysso-cert.betfair.com/api/certlogin',
                      cert=('/etc/ssl/client-2048.crt', '/etc/ssl/client-2048.key'),
                      headers=header, data=auth)

In [7]:
login.json()

{'sessionToken': 'GEkb7ulQjhyVP+n7AbW+RUbwO2MGE7YXQk6z2IHuw5I=',
 'loginStatus': 'SUCCESS'}

In [8]:
ssoid = login.json()['sessionToken']

## Get event types

In [9]:
bet_url = "https://api.betfair.com/exchange/betting/json-rpc/v1"
event_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listEventTypes", "params": {"filter":{ }}, "id": 1}'
headers = {'X-Application': application, 'X-Authentication': ssoid, 'content-type': 'application/json'}
request = requests.post(bet_url, data=event_req.encode('utf-8'), headers=headers) 
request.json()

{'jsonrpc': '2.0',
 'result': [{'eventType': {'id': '1', 'name': 'Soccer'}, 'marketCount': 11310},
  {'eventType': {'id': '2', 'name': 'Tennis'}, 'marketCount': 791},
  {'eventType': {'id': '3', 'name': 'Golf'}, 'marketCount': 48},
  {'eventType': {'id': '4', 'name': 'Cricket'}, 'marketCount': 78},
  {'eventType': {'id': '5', 'name': 'Rugby Union'}, 'marketCount': 106},
  {'eventType': {'id': '1477', 'name': 'Rugby League'}, 'marketCount': 86},
  {'eventType': {'id': '6', 'name': 'Boxing'}, 'marketCount': 32},
  {'eventType': {'id': '7', 'name': 'Horse Racing'}, 'marketCount': 752},
  {'eventType': {'id': '8', 'name': 'Motor Sport'}, 'marketCount': 16},
  {'eventType': {'id': '27454571', 'name': 'Esports'}, 'marketCount': 156},
  {'eventType': {'id': '10', 'name': 'Special Bets'}, 'marketCount': 29},
  {'eventType': {'id': '11', 'name': 'Cycling'}, 'marketCount': 13},
  {'eventType': {'id': '2152880', 'name': 'Gaelic Games'}, 'marketCount': 29},
  {'eventType': {'id': '3988', 'name': '

## Get specific HR markets (event id 7)

In [10]:
event_id = '["7"]'
countries = '["GB", "IE"]'
market_types = '["WIN", "PLACE"]'
market_start_time = (datetime.datetime.now() + datetime.timedelta(hours=-1)).strftime('%Y-%m-%dT%H:%M:%SZ')
market_end_time = (datetime.datetime.now() + datetime.timedelta(hours=24)).strftime('%Y-%m-%dT%H:%M:%SZ')
max_results = str(1000)
sort_type = 'FIRST_TO_START'
metadata = '["EVENT_TYPE", "COMPETITION", "EVENT", "MARKET_START_TIME", "MARKET_DESCRIPTION", "RUNNER_DESCRIPTION"]' #, "RUNNER_METADATA"]'
inplay = 'false'

user_req='{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketCatalogue",\
           "params": {"filter":{"eventTypeIds":'+event_id+',"marketTypeCodes":'+market_types+',\
           "inPlayOnly":'+inplay+', "marketCountries":'+countries+',  \
           "marketStartTime":{"from":"'+market_start_time+'", "to":"'+market_end_time+'"}},\
           "sort":"'+sort_type+'", "maxResults":"'+max_results+'", "marketProjection":'+metadata+'}, "id": 1}'

req = urllib.request.Request(bet_url, data=user_req.encode('utf-8'), headers=headers)
response = urllib.request.urlopen(req)
json_response = response.read()
pkg = json_response.decode('utf-8')
result = json.loads(pkg) 
market_catalogue = result['result']


In [11]:
market_info_time_utc = datetime.datetime.utcnow()

In [12]:
market_info_time_utc

datetime.datetime(2021, 5, 6, 14, 52, 12, 446500)

## Parse market details

In [13]:
def parse_places(description):
    if description.get('marketType', '')=='WIN':
        return 1
    elif 'Who will finish 1st or 2nd' in description.get('rules', ''):
        return 2
    elif 'Who will finish 1st, 2nd or 3rd' in description.get('rules', ''):
        return 3
    elif 'Who will finish 1st, 2nd, 3rd or 4th' in description.get('rules', ''):
        return 4
    else:
        return 'Unknown'

def parse_market_details(market_cat_entry):
    
    description = market_cat_entry.get('description', {})
    event = market_cat_entry.get('event', {})
    event_type = market_cat_entry.get('eventType', {})
    
    return [
        market_cat_entry.get('marketId', None),
        market_cat_entry.get('marketStartTime', None),
        description.get('bspMarket', None),
        description.get('turnInPlayEnabled', None),
        description.get('persistenceEnabled', None),
        description.get('marketBaseRate', None),
        event.get('id', None),
        event_type.get('id', None),
        description.get('raceType', None),
        parse_places(description),
        description.get('bettingType', None),
        description.get('marketType', None),
        description.get('marketTime', None),
        description.get('suspendTime', None),
        description.get('bspReconciled', None),
        description.get('complete', None),
        description.get('inPlay', None),
        str(description.get('regulator', None)),
        event.get('venue', None),
        event.get('countryCode', None),
        description.get('discountAllowed', None),
        event.get('timezone', None),
        event.get('openDate', None),
        event.get('name', None),
        market_cat_entry.get('marketName', None)
    ]


def parse_runners(market_cat_entry):

    market_id = market_cat_entry.get('marketId', None)
    
    runners = market_cat_entry.get('runners', {})
    runners_list = []
    for r in runners:
        r_id = r.get('selectionId', None)
        r_name = r.get('runnerName', None)
        handicap = r.get('handicap', None)
        sort_priority = r.get('sortPriority', None)
        runners_list.append([r_id, r_name, handicap, sort_priority, market_id])
    
    return runners_list

md_cols = [
    'market_id',
    'market_start_time',
    'bsp_market',
    'in_play_enabled',
    'persistence_enabled',
    'market_base_rate',
    'event_id',
    'event_type_id',
    'race_type',
    'number_of_winners',
    'betting_type',
    'market_type',
    'market_time',
    'suspend_time',
    'bsp_reconciled',
    'complete',
    'in_play',
    'regulator',
    'venue',
    'country_code',
    'discount_allowed',
    'timezone',
    'open_date',
    'event_name',
    'market_name'
]

r_cols = ['runner_id', 'runner_name', 'handicap', 'sort_priority', 'market_id']

In [14]:
market_definitions = []
runners = []
for m in market_catalogue:
    market_definitions.append(parse_market_details(m))
    runners += parse_runners(m)
    
market_definitions_df = pd.DataFrame(market_definitions, columns=md_cols)
market_definitions_df['api_call_time_utc'] = market_info_time_utc
runners_df = pd.DataFrame(runners, columns=r_cols)
runners_df['api_call_time_utc'] = market_info_time_utc

In [15]:
# add time to event in order to select best model
market_definitions_df['minutes_to_event'] = (
    pd.to_datetime(market_definitions_df['market_time']) - pd.to_datetime(market_definitions_df['api_call_time_utc'], utc=True)).dt.seconds/60

In [16]:
len(market_definitions_df)

69

## Get odds for markets

In [17]:
def parse_market_book(market_book):
    
    return [
        market_book.get('marketId', None),
        market_book.get('isMarketDataDelayed', None),
        market_book.get('status', None),
        market_book.get('betDelay', None),
        market_book.get('bspReconciled', None),
        market_book.get('complete', None),
        market_book.get('inplay', None),
        market_book.get('numberOfWinners', None),
        market_book.get('numberOfRunners', None),
        market_book.get('numberOfActiveRunners', None),
        market_book.get('lastMatchTime', None),
        market_book.get('totalMatched', None),
        market_book.get('totalAvailable', None),
        market_book.get('crossMatching', None),
        market_book.get('runnersVoidable', None),
        market_book.get('version', None),
        market_book.get('crossMatching', None)
    ]

def parse_market_odds(market_book):
    
    market_id = market_book.get('marketId', None)
    
    runners = market_book.get('runners', {})
    runners_list = []
    for r in runners:
        r_id = r.get('selectionId', None)
        handicap = r.get('handicap', None)
        status = r.get('status', None)
        sort_priority = r.get('adjustmentFactor', None)
        ltp = r.get('lastPriceTraded', None)
        total_matched = r.get('totalMatched', None)
        
        ex_back = r.get('ex', {}).get('availableToBack', [])
        back_prices = [None, None, None]
        back_sizes = [None, None, None]
        for i, b in enumerate(ex_back[:3]):
            back_prices[i] = b.get('price', None)
            back_sizes[i] = b.get('size', None)
        
        ex_lay = r.get('ex', {}).get('availableToLay', [])
        lay_prices = [None, None, None]
        lay_sizes = [None, None, None]
        for i, l in enumerate(ex_lay[:3]):
            lay_prices[i] = l.get('price', None)
            lay_sizes[i] = l.get('size', None)
        
        runners_list.append([r_id, handicap, status, sort_priority, ltp, total_matched] + back_prices + back_sizes + lay_prices + lay_sizes + [market_id])    
    
    return runners_list

mb_cols = [
    'market_id',
    'is_market_data_delayed',
    'market_status',
    'bet_delay',
    'bsp_reconciled',
    'complete',
    'inplay',
    'number_of_winners',
    'number_of_runners',
    'number_of_active_runners',
    'last_match_time',
    'total_matched',
    'total_available',
    'cross_matching',
    'runners_voidable',
    'version',
    'cross_matching'
]

odds_cols = [
    'runner_id', 'handicap', 'status', 'sort_priority', 'ltp', 'total_matched',
    'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
    'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3',
    'market_id'
]

In [18]:
markets = list(market_definitions_df['market_id'].unique())

In [19]:
market_books = []
for m in markets:
    
    priceProjection = '["EX_BEST_OFFERS"]'
    prices_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketBook", "params": {"marketIds": ["' + m + '"],"priceProjection":{"priceData":["EX_BEST_OFFERS"]}}, "id": 1}'
    req = urllib.request.Request(bet_url, data=prices_req.encode('utf-8'), headers=headers)
    prices_response = urllib.request.urlopen(req)
    prices_response_json = prices_response.read()
    prices_pkg = prices_response_json.decode('utf-8')
    prices_result = json.loads(prices_pkg)
    
    market_books.append(prices_result['result'][0])

In [20]:
market_books_lists = []
market_odds_lists = []
for m in market_books:
    market_books_lists.append(parse_market_book(m))
    market_odds_lists += parse_market_odds(m)

In [21]:
market_books_df = pd.DataFrame(market_books_lists, columns=mb_cols)
market_odds_df = pd.DataFrame(market_odds_lists, columns=odds_cols)

## Get into prediction data

#### Combine markets info

In [22]:
# in particular the event details from market definitions and the number of places from the market book

market_definition_columns = [
    'market_id', 'market_start_time', 'market_time', 'suspend_time', 'open_date', 'api_call_time_utc', 'minutes_to_event',
    'event_id', 'venue', 'event_name', 'race_type', 'market_name', 'market_type', 'event_type_id', 'betting_type', 'country_code', 'timezone',
    'bsp_market', 'in_play_enabled', 'persistence_enabled', 'market_base_rate', 'regulator', 'discount_allowed'
]

market_book_columns = [
    'market_id', 'number_of_winners', 'number_of_runners', 'number_of_active_runners',
    'last_match_time', 'total_matched', 'total_available', 'cross_matching', 'runners_voidable', 'version',
    'cross_matching', 'is_market_data_delayed', 'market_status', 'bet_delay', 'bsp_reconciled', 'complete', 'inplay'
]

md_len_check = len(market_definitions_df)
mb_len_check = len(market_definitions_df)

market_details_combined = market_definitions_df[market_definition_columns].merge(market_books_df[market_book_columns], how='left', on='market_id')

if md_len_check!=len(market_details_combined):
    raise StopIteration('Duplicate markets!')

if mb_len_check!=len(market_details_combined):
    raise StopIteration('Potentially missing some market details!')

#### Combine runners and odds data

In [23]:
runner_cols = [
    'runner_id', 'runner_name', 'market_id', 'api_call_time_utc'
]

market_odds_cols = [
    'runner_id', 'handicap', 'status', 'sort_priority', 'market_id', 'ltp', 'total_matched',
    'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
    'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3'
]

runner_len_check = len(runners_df)
odds_len_check = len(market_odds_df)

runners_and_odds = runners_df[runner_cols].merge(market_odds_df[market_odds_cols], how='left', on=['runner_id', 'market_id'])

if runner_len_check!=len(runners_and_odds):
    raise StopIteration('Duplicate runners!')
    
if odds_len_check!=len(runners_and_odds):
    raise StopIteration('Potentially missing runner or odds details!')

In [24]:
market_details_combined.head(3)

Unnamed: 0,market_id,market_start_time,market_time,suspend_time,open_date,api_call_time_utc,minutes_to_event,event_id,venue,event_name,race_type,market_name,market_type,event_type_id,betting_type,country_code,timezone,bsp_market,in_play_enabled,persistence_enabled,market_base_rate,regulator,discount_allowed,number_of_winners,number_of_runners,number_of_active_runners,last_match_time,total_matched,total_available,cross_matching,cross_matching.1,runners_voidable,version,cross_matching.2,cross_matching.3,is_market_data_delayed,market_status,bet_delay,bsp_reconciled,complete,inplay
0,1.182991387,2021-05-06T15:00:00.000Z,2021-05-06T15:00:00.000Z,2021-05-06T15:00:00.000Z,2021-05-06T11:50:00.000Z,2021-05-06 14:52:12.446500,7.783333,30488657,Huntingdon,Huntingdon 6th May,Bumper,2m Mdn NHF,WIN,7,ODDS,GB,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,1,10,8,2021-05-06T14:51:20.635Z,26834.21,735498.96,True,True,False,3796597324,True,True,True,OPEN,0,False,True,False
1,1.182991388,2021-05-06T15:00:00.000Z,2021-05-06T15:00:00.000Z,2021-05-06T15:00:00.000Z,2021-05-06T11:50:00.000Z,2021-05-06 14:52:12.446500,7.783333,30488657,Huntingdon,Huntingdon 6th May,Bumper,To Be Placed,PLACE,7,ODDS,GB,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,3,10,8,2021-05-06T14:51:18.237Z,3081.03,12333.87,False,False,False,3796597326,False,False,True,OPEN,0,False,False,False
2,1.182991604,2021-05-06T15:07:00.000Z,2021-05-06T15:07:00.000Z,2021-05-06T15:07:00.000Z,2021-05-06T12:00:00.000Z,2021-05-06 14:52:12.446500,14.783333,30488686,Worcester,Worcester 6th May,Hurdle,To Be Placed,PLACE,7,ODDS,GB,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,3,10,9,2021-05-06T14:50:58.716Z,1565.95,13273.63,True,True,False,3796605282,True,True,True,OPEN,0,False,False,False


In [25]:
runners_and_odds.head(3)

Unnamed: 0,runner_id,runner_name,market_id,api_call_time_utc,handicap,status,sort_priority,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3
0,39371722,Storm Of Light,1.182991387,2021-05-06 14:52:12.446500,0.0,ACTIVE,25.0,4.2,0.0,4.2,4.1,4.0,238.9,158.53,292.62,4.3,4.4,4.5,205.16,107.04,121.89
1,163268,Call Of The Wild,1.182991387,2021-05-06 14:52:12.446500,0.0,ACTIVE,22.476,4.0,0.0,4.0,3.95,3.9,282.47,576.77,163.29,4.1,4.2,4.3,136.14,106.33,122.29
2,39371724,Urban Grit,1.182991387,2021-05-06 14:52:12.446500,0.0,ACTIVE,12.717,8.2,0.0,8.2,8.0,7.8,46.18,73.13,72.63,8.4,8.6,8.8,48.98,27.24,19.19


#### Pair up win and place markets

In [26]:
place_market_details = market_details_combined[market_details_combined['market_type']=='PLACE']
win_market_details = market_details_combined[market_details_combined['market_type']=='WIN']

win_place_map = win_market_details[['event_id', 'market_time', 'market_id']].rename(columns={'market_id': 'market_id_win'}).merge(
    place_market_details[['event_id', 'market_time', 'market_id']].rename(columns={'market_id': 'market_id_place'}),
    how='left', on=['event_id', 'market_time'])

runners_and_odds = runners_and_odds.merge(market_details_combined[['market_id', 'market_type']], how='left', on='market_id')

In [27]:
# add win odds to all runners data for ordering
runners_and_odds = runners_and_odds.merge(
    win_place_map[['market_id_win', 'market_id_place']].rename(columns={'market_id_place': 'market_id'}), how='left', on='market_id')
runners_and_odds = runners_and_odds.merge(
    win_place_map[['market_id_win', 'market_id_place']].rename(columns={'market_id_win': 'market_id'}), how='left', on='market_id')

runners_and_odds.loc[runners_and_odds['market_type']=='WIN', 'market_id_win'] = runners_and_odds.loc[runners_and_odds['market_type']=='WIN', 'market_id']
runners_and_odds.loc[runners_and_odds['market_type']=='PLACE', 'market_id_place'] = runners_and_odds.loc[runners_and_odds['market_type']=='PLACE', 'market_id']

In [28]:
r_and_o_len_check = len(runners_and_odds)

runners_and_odds = runners_and_odds.merge(
    runners_and_odds[['runner_id', 'market_id', 'back_price_1']].rename(columns={'market_id': 'market_id_win', 'back_price_1': 'win_odds_1'}),
    how='left', on=['runner_id', 'market_id_win'])

if r_and_o_len_check!=len(runners_and_odds):
    raise StopIteration('Merge problems!')

In [29]:
runners_and_odds = runners_and_odds.sort_values('win_odds_1')
runners_and_odds['odds_order'] = runners_and_odds.groupby('market_id').cumcount()

In [30]:
runners_and_odds_piv = runners_and_odds.pivot_table(
    values='win_odds_1',
    index='market_id',
    columns='odds_order',
    aggfunc='mean',
    fill_value=None
)
runners_and_odds_piv.columns = ['odds_'+str(c)+'_win' for c in runners_and_odds_piv.columns]

In [31]:
max_runners = max(market_details_combined['number_of_active_runners'])
odds_cols = ['odds_'+str(r)+'_win' for r in range(max_runners)]
prediction_data_list = []
for i in range(max_runners):
    prediction_tmp = runners_and_odds[runners_and_odds['odds_order']==i]
    prediction_tmp = prediction_tmp.merge(runners_and_odds_piv, how='left', on='market_id')
    odds_cols_tmp = odds_cols.copy()
    odds_cols_tmp.remove('odds_'+str(i)+'_win')
    
    prediction_tmp = prediction_tmp.rename(columns={'odds_'+str(i)+'_win': 'odds_horse_win'})
    prediction_data_cols = ['runner_id', 'runner_name', 'status', 'market_id', 'market_type', 'market_id_win', 'market_id_place', 'api_call_time_utc',
                            'handicap', 'ltp', 'total_matched', 'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
                            'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3',
                            'odds_horse_win']
    prediction_tmp = prediction_tmp[prediction_data_cols + odds_cols_tmp]
    prediction_tmp.columns = prediction_data_cols + odds_cols[1:] # renaming odds names to have consecutive numbers
    prediction_data_list.append(prediction_tmp)

In [32]:
prediction_data_df = pd.concat(prediction_data_list, axis=0)

In [33]:
# add on some of the market details

market_deets_cols = ['market_id', 'market_time', 'minutes_to_event', 'venue', 'event_name', 'race_type', 'market_name',
                     'number_of_winners', 'number_of_runners', 'number_of_active_runners']

prediction_data_df = prediction_data_df.merge(market_details_combined[market_deets_cols], how='left', on='market_id')

In [34]:
prediction_data_df = prediction_data_df.rename(columns={'number_of_runners': 'number_of_runners_orig', 'number_of_active_runners': 'number_of_runners'})

#### Check probabilities in reasonable ranges

In [35]:
runners_and_odds['p_back_price_1'] = 1/runners_and_odds['back_price_1']
prob_sums = runners_and_odds.groupby('market_id')['p_back_price_1'].sum().reset_index().rename(columns={'p_back_price_1': 'p_sum'})

In [36]:
prediction_data_df = prediction_data_df.merge(prob_sums.rename(columns={'market_id': 'market_id_win'}), how='left', on='market_id_win')

#### Subset markets

In [37]:
number_winners = 3
runners_min = 8
runners_max = 15
win_p_sum_min = 1.0
win_p_sum_max = 1.5

prediction_data_df = prediction_data_df[
    (prediction_data_df['market_type']=='PLACE') &
    (prediction_data_df['number_of_winners']==number_winners) &
    (prediction_data_df['number_of_runners'].between(runners_min, runners_max)) &
    (prediction_data_df['p_sum'].between(win_p_sum_min, win_p_sum_max))]

#### Make nulls 99999s to mimic training process

In [38]:
features = ['number_of_runners', 'odds_horse_win'] + ['odds_'+str(i+1)+'_win' for i in range(runners_max-1)]
default_odds = 99999
for f in features:
    prediction_data_df[f] = prediction_data_df[f].fillna(default_odds)

#### Subset to data for each model based on the time to race

In [39]:
prediction_data_df.head(3)

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,p_sum
34,22478914,First Street,ACTIVE,1.182991393,PLACE,1.182991392,1.182991393,2021-05-06 14:52:12.446500,0.0,1.25,0.0,1.23,1.22,1.21,77.75,60.68,68.38,1.26,1.27,1.28,26.17,52.03,86.36,2.32,5.1,6.8,9.8,13.5,24.0,48.0,90.0,180.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-06T15:35:00.000Z,42.783333,Huntingdon,Huntingdon 6th May,Bumper,To Be Placed,3,10,9,1.029453
38,34003197,Cardboard Gangster,ACTIVE,1.182993292,PLACE,1.182993291,1.182993292,2021-05-06 14:52:12.446500,0.0,1.51,0.0,1.39,1.37,1.36,19.8,36.87,68.13,1.62,1.65,1.69,50.5,63.62,17.64,2.96,4.9,6.4,6.4,9.4,18.5,34.0,55.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-06T17:30:00.000Z,157.783333,Tipperary,Tipperary 6th May,Chase,To Be Placed,3,8,8,1.06245
42,35808417,Jebel Dukhan,ACTIVE,1.182991819,PLACE,1.182991818,1.182991819,2021-05-06 14:52:12.446500,0.0,1.35,0.0,1.35,1.34,1.33,13.33,40.0,39.82,1.38,1.39,1.4,87.77,18.59,115.14,3.3,3.4,3.5,18.0,26.0,30.0,85.0,100.0,140.0,360.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-06T15:50:00.000Z,57.783333,Chelmsford City,Chelmsford City 6th May,Flat,To Be Placed,3,11,10,1.041898


In [40]:
prediction_data_df['minutes_to_event_rounded'] = 30*((prediction_data_df['minutes_to_event']/30).astype(int))

prediction_data_60 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==60]
d60 = xgb.DMatrix(prediction_data_60[features])
prediction_data_60['preds'] = betfair_places_model_60.predict(d60)
prediction_data_60['pred_odds'] = 1/prediction_data_60['preds']

prediction_data_120 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==120]
d120 = xgb.DMatrix(prediction_data_120[features])
prediction_data_120['preds'] = betfair_places_model_120.predict(d120)
prediction_data_120['pred_odds'] = 1/prediction_data_120['preds']

prediction_data_180 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==180]
d180 = xgb.DMatrix(prediction_data_180[features])
prediction_data_180['preds'] = betfair_places_model_180.predict(d180)
prediction_data_180['pred_odds'] = 1/prediction_data_180['preds']

prediction_data_240 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==240]
d240 = xgb.DMatrix(prediction_data_240[features])
prediction_data_240['preds'] = betfair_places_model_240.predict(d240)
prediction_data_240['pred_odds'] = 1/prediction_data_240['preds']

prediction_data_300 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==300]
d300 = xgb.DMatrix(prediction_data_300[features])
prediction_data_300['preds'] = betfair_places_model_300.predict(d300)
prediction_data_300['pred_odds'] = 1/prediction_data_300['preds']

output_data = pd.concat([prediction_data_60, prediction_data_120, prediction_data_180, prediction_data_240, prediction_data_300], axis=0)

#### Betting constraints

In [49]:
# back bets
odds_margin_mult = 1.1
min_odds = 1
max_odds = 3

back_mask = (output_data['pred_odds']*odds_margin_mult<output_data['back_price_1']) & (output_data['back_price_1'].between(min_odds, max_odds))

output_data['back'] = 0
output_data.loc[back_mask, 'back'] = 1

In [50]:
# lay bets
lay_margin = 1.1
lay_odds_min = 0
lay_odds_max = 3

lay_mask = (output_data['pred_odds'] > output_data['lay_price_1']*lay_margin) & (output_data['lay_price_1'].between(lay_odds_min, lay_odds_max))

output_data['lay'] = 0
output_data.loc[lay_mask, 'lay'] = 1

In [51]:
output_data[output_data['back']==1]

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,p_sum,minutes_to_event_rounded,preds,pred_odds,back,lay


In [52]:
output_data[output_data['lay']==1]

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,p_sum,minutes_to_event_rounded,preds,pred_odds,back,lay


#### Send outputs to google sheets/db

#### Finally will want to construct a loop to repeat process automatically every hour

#### Other thoughts for productionizing:
* Make separate classes
* Store models more intelligently, perhaps as 1 dict and also with supplemental info like features, default values etc (although is some of this already in model artifact?)
* Probably need more checks and controls