# Betfair getting odds and placing bets

#### Useful links

* https://docs.developer.betfair.com/display/1smk3cen4v3lu3yomq5qye0ni/Betting+Enums#BettingEnums-MarketProjection for api method params


In [1]:
import requests
import urllib
import json
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook
import datetime
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw
import logging

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [5]:
logging.basicConfig(filename='example.log', level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s')

## Load models

In [6]:
# horses models
with open('/home/angus/projects/betting/tote/models/betfair_places_model_60.pkl', 'rb') as f:
    betfair_places_model_60 = pickle.load(f)

with open('/home/angus/projects/betting/tote/models/betfair_places_model_120.pkl', 'rb') as f:
    betfair_places_model_120 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_180.pkl', 'rb') as f:
    betfair_places_model_180 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_240.pkl', 'rb') as f:
    betfair_places_model_240 = pickle.load(f)
    
with open('/home/angus/projects/betting/tote/models/betfair_places_model_300.pkl', 'rb') as f:
    betfair_places_model_300 = pickle.load(f)

## Sign in

In [7]:
try:

    header = {'X-Application': application, 'Content-Type': 'application/x-www-form-urlencoded'}
    auth = 'username='+username+'&password='+password

    login = requests.post('https://identitysso-cert.betfair.com/api/certlogin',
                          cert=('/etc/ssl/client-2048.crt', '/etc/ssl/client-2048.key'),
                          headers=header, data=auth)
    
    login_success = login.json()['loginStatus']
    logging.info('Login '+str(login_success))

except Exception as error:
    login_status_code = login.status_code
    logging.error(error + 'Login error '+str(login_status_code))
    
    # after logging will want to either pass (continue with code below), continue (go back to start of loop perhaps with a pause first) or break

In [8]:
login.json()

{'sessionToken': '/f6P6/heO8pZdjaL5rk3wq/UVWP1ztm+ACqQuSgadAw=',
 'loginStatus': 'SUCCESS'}

In [9]:
login.status_code

200

In [10]:
ssoid = login.json()['sessionToken']

## Get event types

In [11]:
bet_url = "https://api.betfair.com/exchange/betting/json-rpc/v1"
event_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listEventTypes", "params": {"filter":{ }}, "id": 1}'
headers = {'X-Application': application, 'X-Authentication': ssoid, 'content-type': 'application/json'}
request = requests.post(bet_url, data=event_req.encode('utf-8'), headers=headers) 
request.json()

{'jsonrpc': '2.0',
 'result': [{'eventType': {'id': '1', 'name': 'Soccer'}, 'marketCount': 8502},
  {'eventType': {'id': '2', 'name': 'Tennis'}, 'marketCount': 2850},
  {'eventType': {'id': '3', 'name': 'Golf'}, 'marketCount': 18},
  {'eventType': {'id': '4', 'name': 'Cricket'}, 'marketCount': 96},
  {'eventType': {'id': '5', 'name': 'Rugby Union'}, 'marketCount': 88},
  {'eventType': {'id': '1477', 'name': 'Rugby League'}, 'marketCount': 99},
  {'eventType': {'id': '6', 'name': 'Boxing'}, 'marketCount': 17},
  {'eventType': {'id': '7', 'name': 'Horse Racing'}, 'marketCount': 517},
  {'eventType': {'id': '8', 'name': 'Motor Sport'}, 'marketCount': 4},
  {'eventType': {'id': '27454571', 'name': 'Esports'}, 'marketCount': 197},
  {'eventType': {'id': '10', 'name': 'Special Bets'}, 'marketCount': 29},
  {'eventType': {'id': '11', 'name': 'Cycling'}, 'marketCount': 24},
  {'eventType': {'id': '2152880', 'name': 'Gaelic Games'}, 'marketCount': 32},
  {'eventType': {'id': '3988', 'name': 'At

## Get specific HR markets (event id 7)

In [12]:
event_id = '["7"]'
countries = '["GB", "IE"]'
market_types = '["WIN", "PLACE"]'
market_start_time = (datetime.datetime.now() + datetime.timedelta(hours=-1)).strftime('%Y-%m-%dT%H:%M:%SZ')
market_end_time = (datetime.datetime.now() + datetime.timedelta(hours=24)).strftime('%Y-%m-%dT%H:%M:%SZ')
max_results = str(1000)
sort_type = 'FIRST_TO_START'
metadata = '["EVENT_TYPE", "COMPETITION", "EVENT", "MARKET_START_TIME", "MARKET_DESCRIPTION", "RUNNER_DESCRIPTION"]' #, "RUNNER_METADATA"]'
inplay = 'false'

user_req='{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketCatalogue",\
           "params": {"filter":{"eventTypeIds":'+event_id+',"marketTypeCodes":'+market_types+',\
           "inPlayOnly":'+inplay+', "marketCountries":'+countries+',  \
           "marketStartTime":{"from":"'+market_start_time+'", "to":"'+market_end_time+'"}},\
           "sort":"'+sort_type+'", "maxResults":"'+max_results+'", "marketProjection":'+metadata+'}, "id": 1}'

# req = urllib.request.Request(bet_url, data=user_req.encode('utf-8'), headers=headers)
# response = urllib.request.urlopen(req)
# json_response = response.read()
# pkg = json_response.decode('utf-8')
# result = json.loads(pkg) 
# market_catalogue = result['result']
request = requests.post(bet_url, data=user_req.encode('utf-8'), headers=headers)
market_catalogue = request.json()['result']


In [13]:
market_info_time_utc = datetime.datetime.utcnow()

In [14]:
market_info_time_utc

datetime.datetime(2021, 5, 11, 7, 50, 13, 348473)

## Parse market details

In [15]:
def parse_places(description):
    if description.get('marketType', '')=='WIN':
        return 1
    elif 'Who will finish 1st or 2nd' in description.get('rules', ''):
        return 2
    elif 'Who will finish 1st, 2nd or 3rd' in description.get('rules', ''):
        return 3
    elif 'Who will finish 1st, 2nd, 3rd or 4th' in description.get('rules', ''):
        return 4
    else:
        return 'Unknown'

def parse_market_details(market_cat_entry):
    
    description = market_cat_entry.get('description', {})
    event = market_cat_entry.get('event', {})
    event_type = market_cat_entry.get('eventType', {})
    
    return [
        market_cat_entry.get('marketId', None),
        market_cat_entry.get('marketStartTime', None),
        description.get('bspMarket', None),
        description.get('turnInPlayEnabled', None),
        description.get('persistenceEnabled', None),
        description.get('marketBaseRate', None),
        event.get('id', None),
        event_type.get('id', None),
        description.get('raceType', None),
        parse_places(description),
        description.get('bettingType', None),
        description.get('marketType', None),
        description.get('marketTime', None),
        description.get('suspendTime', None),
        description.get('bspReconciled', None),
        description.get('complete', None),
        description.get('inPlay', None),
        str(description.get('regulator', None)),
        event.get('venue', None),
        event.get('countryCode', None),
        description.get('discountAllowed', None),
        event.get('timezone', None),
        event.get('openDate', None),
        event.get('name', None),
        market_cat_entry.get('marketName', None)
    ]


def parse_runners(market_cat_entry):

    market_id = market_cat_entry.get('marketId', None)
    
    runners = market_cat_entry.get('runners', {})
    runners_list = []
    for r in runners:
        r_id = r.get('selectionId', None)
        r_name = r.get('runnerName', None)
        handicap = r.get('handicap', None)
        sort_priority = r.get('sortPriority', None)
        runners_list.append([r_id, r_name, handicap, sort_priority, market_id])
    
    return runners_list

md_cols = [
    'market_id',
    'market_start_time',
    'bsp_market',
    'in_play_enabled',
    'persistence_enabled',
    'market_base_rate',
    'event_id',
    'event_type_id',
    'race_type',
    'number_of_winners',
    'betting_type',
    'market_type',
    'market_time',
    'suspend_time',
    'bsp_reconciled',
    'complete',
    'in_play',
    'regulator',
    'venue',
    'country_code',
    'discount_allowed',
    'timezone',
    'open_date',
    'event_name',
    'market_name'
]

r_cols = ['runner_id', 'runner_name', 'handicap', 'sort_priority', 'market_id']

In [16]:
market_definitions = []
runners = []
for m in market_catalogue:
    market_definitions.append(parse_market_details(m))
    runners += parse_runners(m)
    
market_definitions_df = pd.DataFrame(market_definitions, columns=md_cols)
market_definitions_df['api_call_time_utc'] = market_info_time_utc
runners_df = pd.DataFrame(runners, columns=r_cols)
runners_df['api_call_time_utc'] = market_info_time_utc

In [17]:
# add time to event in order to select appropriate model
market_definitions_df['minutes_to_event'] = (
    pd.to_datetime(market_definitions_df['market_time']) - pd.to_datetime(market_definitions_df['api_call_time_utc'], utc=True)).dt.seconds/60

In [18]:
len(market_definitions_df)

82

## Get odds for markets

In [19]:
def parse_market_book(market_book):
    
    return [
        market_book.get('marketId', None),
        market_book.get('isMarketDataDelayed', None),
        market_book.get('status', None),
        market_book.get('betDelay', None),
        market_book.get('bspReconciled', None),
        market_book.get('complete', None),
        market_book.get('inplay', None),
        market_book.get('numberOfWinners', None),
        market_book.get('numberOfRunners', None),
        market_book.get('numberOfActiveRunners', None),
        market_book.get('lastMatchTime', None),
        market_book.get('totalMatched', None),
        market_book.get('totalAvailable', None),
        market_book.get('crossMatching', None),
        market_book.get('runnersVoidable', None),
        market_book.get('version', None),
        market_book.get('crossMatching', None)
    ]

def parse_market_odds(market_book):
    
    market_id = market_book.get('marketId', None)
    
    runners = market_book.get('runners', {})
    runners_list = []
    for r in runners:
        r_id = r.get('selectionId', None)
        handicap = r.get('handicap', None)
        status = r.get('status', None)
        sort_priority = r.get('adjustmentFactor', None)
        ltp = r.get('lastPriceTraded', None)
        total_matched = r.get('totalMatched', None)
        
        ex_back = r.get('ex', {}).get('availableToBack', [])
        back_prices = [None, None, None]
        back_sizes = [None, None, None]
        for i, b in enumerate(ex_back[:3]):
            back_prices[i] = b.get('price', None)
            back_sizes[i] = b.get('size', None)
        
        ex_lay = r.get('ex', {}).get('availableToLay', [])
        lay_prices = [None, None, None]
        lay_sizes = [None, None, None]
        for i, l in enumerate(ex_lay[:3]):
            lay_prices[i] = l.get('price', None)
            lay_sizes[i] = l.get('size', None)
        
        runners_list.append([r_id, handicap, status, sort_priority, ltp, total_matched] + back_prices + back_sizes + lay_prices + lay_sizes + [market_id])    
    
    return runners_list

mb_cols = [
    'market_id',
    'is_market_data_delayed',
    'market_status',
    'bet_delay',
    'bsp_reconciled',
    'complete',
    'inplay',
    'number_of_winners',
    'number_of_runners',
    'number_of_active_runners',
    'last_match_time',
    'total_matched',
    'total_available',
    'cross_matching',
    'runners_voidable',
    'version',
    'cross_matching'
]

odds_cols = [
    'runner_id', 'handicap', 'status', 'sort_priority', 'ltp', 'total_matched',
    'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
    'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3',
    'market_id'
]

In [20]:
markets = list(market_definitions_df['market_id'].unique())

In [21]:
market_books = []
for m in markets:
    
    priceProjection = '["EX_BEST_OFFERS"]'
    prices_req = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/listMarketBook", "params": {"marketIds": ["' + m + '"],"priceProjection":{"priceData":["EX_BEST_OFFERS"]}}, "id": 1}'
#     req = urllib.request.Request(bet_url, data=prices_req.encode('utf-8'), headers=headers)
#     prices_response = urllib.request.urlopen(req)
#     prices_response_json = prices_response.read()
#     prices_pkg = prices_response_json.decode('utf-8')
#     prices_result = json.loads(prices_pkg)
    request = requests.post(bet_url, data=prices_req.encode('utf-8'), headers=headers)
    prices_result = request.json()
    
    market_books.append(prices_result['result'][0])

In [22]:
market_books_lists = []
market_odds_lists = []
for m in market_books:
    market_books_lists.append(parse_market_book(m))
    market_odds_lists += parse_market_odds(m)

In [23]:
market_books_df = pd.DataFrame(market_books_lists, columns=mb_cols)
market_odds_df = pd.DataFrame(market_odds_lists, columns=odds_cols)

## Get into prediction data

#### Combine markets info

In [24]:
# in particular the event details from market definitions and the number of places from the market book

market_definition_columns = [
    'market_id', 'market_start_time', 'market_time', 'suspend_time', 'open_date', 'api_call_time_utc', 'minutes_to_event',
    'event_id', 'venue', 'event_name', 'race_type', 'market_name', 'market_type', 'event_type_id', 'betting_type', 'country_code', 'timezone',
    'bsp_market', 'in_play_enabled', 'persistence_enabled', 'market_base_rate', 'regulator', 'discount_allowed'
]

market_book_columns = [
    'market_id', 'number_of_winners', 'number_of_runners', 'number_of_active_runners',
    'last_match_time', 'total_matched', 'total_available', 'cross_matching', 'runners_voidable', 'version',
    'cross_matching', 'is_market_data_delayed', 'market_status', 'bet_delay', 'bsp_reconciled', 'complete', 'inplay'
]

md_len_check = len(market_definitions_df)
mb_len_check = len(market_definitions_df)

market_details_combined = market_definitions_df[market_definition_columns].merge(market_books_df[market_book_columns], how='left', on='market_id')

if md_len_check!=len(market_details_combined):
    raise StopIteration('Duplicate markets!')

if mb_len_check!=len(market_details_combined):
    raise StopIteration('Potentially missing some market details!')

#### Combine runners and odds data

In [25]:
runner_cols = [
    'runner_id', 'runner_name', 'market_id', 'api_call_time_utc'
]

market_odds_cols = [
    'runner_id', 'handicap', 'status', 'sort_priority', 'market_id', 'ltp', 'total_matched',
    'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
    'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3'
]

runner_len_check = len(runners_df)
odds_len_check = len(market_odds_df)

runners_and_odds = runners_df[runner_cols].merge(market_odds_df[market_odds_cols], how='left', on=['runner_id', 'market_id'])

if runner_len_check!=len(runners_and_odds):
    raise StopIteration('Duplicate runners!')
    
if odds_len_check!=len(runners_and_odds):
    raise StopIteration('Potentially missing runner or odds details!')

In [26]:
market_details_combined.head(3)

Unnamed: 0,market_id,market_start_time,market_time,suspend_time,open_date,api_call_time_utc,minutes_to_event,event_id,venue,event_name,race_type,market_name,market_type,event_type_id,betting_type,country_code,timezone,bsp_market,in_play_enabled,persistence_enabled,market_base_rate,regulator,discount_allowed,number_of_winners,number_of_runners,number_of_active_runners,last_match_time,total_matched,total_available,cross_matching,cross_matching.1,runners_voidable,version,cross_matching.2,cross_matching.3,is_market_data_delayed,market_status,bet_delay,bsp_reconciled,complete,inplay
0,1.18319245,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11 07:50:13.348473,249.766667,30508677,Sedgefield,Sedgefield 11th May,Hurdle,2m1f Nov Hrd,WIN,7,ODDS,GB,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,1,10,9,2021-05-11T07:47:17.116Z,6144.74,526106.85,False,False,False,3808081127,False,False,True,OPEN,0,False,True,False
1,1.183192451,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11T12:00:00.000Z,2021-05-11 07:50:13.348473,249.766667,30508677,Sedgefield,Sedgefield 11th May,Hurdle,To Be Placed,PLACE,7,ODDS,GB,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,3,9,9,2021-05-11T07:47:01.096Z,495.74,14547.53,False,False,False,3808081121,False,False,True,OPEN,0,False,False,False
2,1.183193225,2021-05-11T12:30:00.000Z,2021-05-11T12:30:00.000Z,2021-05-11T12:30:00.000Z,2021-05-11T12:30:00.000Z,2021-05-11 07:50:13.348473,279.766667,30508794,Roscommon,Roscommon 11th May,Flat,7f Mdn,WIN,7,ODDS,IE,Europe/London,True,True,True,5.0,GIBRALTAR REGULATOR,False,1,9,9,2021-05-11T07:46:29.480Z,1197.98,467614.15,False,False,False,3808082791,False,False,True,OPEN,0,False,True,False


In [27]:
runners_and_odds.head(3)

Unnamed: 0,runner_id,runner_name,market_id,api_call_time_utc,handicap,status,sort_priority,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3
0,38130464,Geromino,1.18319245,2021-05-11 07:50:13.348473,0.0,ACTIVE,27.208,2.7,0.0,2.62,2.6,2.56,10.91,12.42,19.77,2.7,2.72,2.8,10.14,10.01,93.15
1,35872767,Gordons Jet,1.18319245,2021-05-11 07:50:13.348473,0.0,ACTIVE,29.187,3.9,0.0,3.75,3.6,3.55,19.33,16.16,11.12,3.95,4.2,4.5,11.16,32.28,59.81
2,4872811,Zoran,1.18319245,2021-05-11 07:50:13.348473,0.0,ACTIVE,23.472,5.1,0.0,5.0,4.8,4.4,15.92,29.53,57.72,5.4,5.9,6.0,15.91,13.24,49.3


#### Pair up win and place markets

In [28]:
place_market_details = market_details_combined[market_details_combined['market_type']=='PLACE']
win_market_details = market_details_combined[market_details_combined['market_type']=='WIN']

win_place_map = win_market_details[['event_id', 'market_time', 'market_id']].rename(columns={'market_id': 'market_id_win'}).merge(
    place_market_details[['event_id', 'market_time', 'market_id']].rename(columns={'market_id': 'market_id_place'}),
    how='left', on=['event_id', 'market_time'])

runners_and_odds = runners_and_odds.merge(market_details_combined[['market_id', 'market_type']], how='left', on='market_id')

In [29]:
# add win odds to all runners data for ordering
runners_and_odds = runners_and_odds.merge(
    win_place_map[['market_id_win', 'market_id_place']].rename(columns={'market_id_place': 'market_id'}), how='left', on='market_id')
runners_and_odds = runners_and_odds.merge(
    win_place_map[['market_id_win', 'market_id_place']].rename(columns={'market_id_win': 'market_id'}), how='left', on='market_id')

runners_and_odds.loc[runners_and_odds['market_type']=='WIN', 'market_id_win'] = runners_and_odds.loc[runners_and_odds['market_type']=='WIN', 'market_id']
runners_and_odds.loc[runners_and_odds['market_type']=='PLACE', 'market_id_place'] = runners_and_odds.loc[runners_and_odds['market_type']=='PLACE', 'market_id']

In [30]:
r_and_o_len_check = len(runners_and_odds)

runners_and_odds = runners_and_odds.merge(
    runners_and_odds[['runner_id', 'market_id', 'back_price_1']].rename(columns={'market_id': 'market_id_win', 'back_price_1': 'win_odds_1'}),
    how='left', on=['runner_id', 'market_id_win'])

if r_and_o_len_check!=len(runners_and_odds):
    raise StopIteration('Merge problems!')

In [31]:
runners_and_odds = runners_and_odds.sort_values('win_odds_1')
runners_and_odds['odds_order'] = runners_and_odds.groupby('market_id').cumcount()

In [32]:
runners_and_odds_piv = runners_and_odds.pivot_table(
    values='win_odds_1',
    index='market_id',
    columns='odds_order',
    aggfunc='mean',
    fill_value=None
)
runners_and_odds_piv.columns = ['odds_'+str(c)+'_win' for c in runners_and_odds_piv.columns]

In [33]:
max_runners = max(market_details_combined['number_of_active_runners'])
odds_cols = ['odds_'+str(r)+'_win' for r in range(max_runners)]
prediction_data_list = []
for i in range(max_runners):
    prediction_tmp = runners_and_odds[runners_and_odds['odds_order']==i]
    prediction_tmp = prediction_tmp.merge(runners_and_odds_piv, how='left', on='market_id')
    odds_cols_tmp = odds_cols.copy()
    odds_cols_tmp.remove('odds_'+str(i)+'_win')
    
    prediction_tmp = prediction_tmp.rename(columns={'odds_'+str(i)+'_win': 'odds_horse_win'})
    prediction_data_cols = ['runner_id', 'runner_name', 'status', 'market_id', 'market_type', 'market_id_win', 'market_id_place', 'api_call_time_utc',
                            'handicap', 'ltp', 'total_matched', 'back_price_1', 'back_price_2', 'back_price_3', 'back_size_1', 'back_size_2', 'back_size_3',
                            'lay_price_1', 'lay_price_2', 'lay_price_3', 'lay_size_1', 'lay_size_2', 'lay_size_3',
                            'odds_horse_win']
    prediction_tmp = prediction_tmp[prediction_data_cols + odds_cols_tmp]
    prediction_tmp.columns = prediction_data_cols + odds_cols[1:] # renaming odds names to have consecutive numbers
    prediction_data_list.append(prediction_tmp)

In [34]:
prediction_data_df = pd.concat(prediction_data_list, axis=0)

In [35]:
# add on some of the market details

market_deets_cols = ['market_id', 'market_time', 'minutes_to_event', 'venue', 'event_name', 'race_type', 'market_name',
                     'number_of_winners', 'number_of_runners', 'number_of_active_runners', 'version']

prediction_data_df = prediction_data_df.merge(market_details_combined[market_deets_cols], how='left', on='market_id')

In [36]:
prediction_data_df = prediction_data_df.rename(columns={'number_of_runners': 'number_of_runners_orig', 'number_of_active_runners': 'number_of_runners'})

#### Check probabilities in reasonable ranges

In [37]:
runners_and_odds['p_back_price_1'] = 1/runners_and_odds['back_price_1']
prob_sums = runners_and_odds.groupby('market_id')['p_back_price_1'].sum().reset_index().rename(columns={'p_back_price_1': 'p_sum'})

In [38]:
prediction_data_df = prediction_data_df.merge(prob_sums.rename(columns={'market_id': 'market_id_win'}), how='left', on='market_id_win')

#### Subset markets

In [39]:
number_winners = 3
runners_min = 8
runners_max = 15
win_p_sum_min = 0.95
win_p_sum_max = 1.5

prediction_data_df = prediction_data_df[
    (prediction_data_df['market_type']=='PLACE') &
    (prediction_data_df['number_of_winners']==number_winners) &
    (prediction_data_df['number_of_runners'].between(runners_min, runners_max)) &
    (prediction_data_df['p_sum'].between(win_p_sum_min, win_p_sum_max))]

#### Make nulls 99999s to mimic training process

In [40]:
features = ['number_of_runners', 'odds_horse_win'] + ['odds_'+str(i+1)+'_win' for i in range(runners_max-1)]
default_odds = 99999
for f in features:
    prediction_data_df[f] = prediction_data_df[f].fillna(default_odds)

#### Subset to data for each model based on the time to race

In [41]:
prediction_data_df.head(3)

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,version,p_sum
20,51432,Dream With Me,ACTIVE,1.183193012,PLACE,1.183193011,1.183193012,2021-05-11 07:50:13.348473,0.0,1.19,0.0,1.2,1.19,1.06,44.32,28.58,14.0,1.52,1.53,,17.57,2.0,,2.42,3.0,9.4,12.0,14.0,14.0,23.0,28.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-11T19:05:00.000Z,674.766667,Ayr,Ayr 11th May,Flat,To Be Placed,3,8,8,3808082905,1.158322
24,12936612,Zamarkhan,ACTIVE,1.183192461,PLACE,1.18319246,1.183192461,2021-05-11 07:50:13.348473,0.0,1.47,0.0,1.24,1.15,1.09,48.9,33.36,12.2,1.44,980.0,,46.9,9.0,,2.46,2.88,11.0,12.0,13.5,14.5,16.5,23.0,46.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-11T13:07:00.000Z,316.766667,Sedgefield,Sedgefield 11th May,Hurdle,To Be Placed,3,9,9,3808076753,1.196832
26,39013026,Cool And Clever,ACTIVE,1.183193226,PLACE,1.183193225,1.183193226,2021-05-11 07:50:13.348473,0.0,1.17,0.0,1.29,1.24,1.15,26.31,17.37,18.7,1.57,,,17.96,,,2.52,3.1,10.0,10.5,13.5,13.5,16.0,16.0,38.0,99999.0,99999.0,99999.0,99999.0,99999.0,99999.0,,,,2021-05-11T12:30:00.000Z,279.766667,Roscommon,Roscommon 11th May,Flat,To Be Placed,3,9,9,3808082781,1.214108


In [42]:
prediction_data_df['minutes_to_event_rounded'] = 30*((prediction_data_df['minutes_to_event']/30).astype(int))

prediction_data_60 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==60]
d60 = xgb.DMatrix(prediction_data_60[features])
prediction_data_60['preds'] = betfair_places_model_60.predict(d60)
prediction_data_60['pred_odds'] = 1/prediction_data_60['preds']

prediction_data_120 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==120]
d120 = xgb.DMatrix(prediction_data_120[features])
prediction_data_120['preds'] = betfair_places_model_120.predict(d120)
prediction_data_120['pred_odds'] = 1/prediction_data_120['preds']

prediction_data_180 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==180]
d180 = xgb.DMatrix(prediction_data_180[features])
prediction_data_180['preds'] = betfair_places_model_180.predict(d180)
prediction_data_180['pred_odds'] = 1/prediction_data_180['preds']

prediction_data_240 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==240]
d240 = xgb.DMatrix(prediction_data_240[features])
prediction_data_240['preds'] = betfair_places_model_240.predict(d240)
prediction_data_240['pred_odds'] = 1/prediction_data_240['preds']

prediction_data_300 = prediction_data_df[prediction_data_df['minutes_to_event_rounded']==300]
d300 = xgb.DMatrix(prediction_data_300[features])
prediction_data_300['preds'] = betfair_places_model_300.predict(d300)
prediction_data_300['pred_odds'] = 1/prediction_data_300['preds']

output_data = pd.concat([prediction_data_60, prediction_data_120, prediction_data_180, prediction_data_240, prediction_data_300], axis=0)

#### Betting constraints

In [43]:
# back bets
odds_margin_mult = 1.1
min_odds = 1
max_odds = 5

back_mask = (output_data['pred_odds']*odds_margin_mult<output_data['back_price_1']) & (output_data['back_price_1'].between(min_odds, max_odds))

output_data['back'] = 0
output_data.loc[back_mask, 'back'] = 1
back_bets = output_data[output_data['back']==1]

In [44]:
# lay bets
lay_margin = 1.1
lay_odds_min = 0
lay_odds_max = 3

lay_mask = (output_data['pred_odds'] > output_data['lay_price_1']*lay_margin) & (output_data['lay_price_1'].between(lay_odds_min, lay_odds_max))

output_data['lay'] = 0
output_data.loc[lay_mask, 'lay'] = 1
lay_bets = output_data[output_data['lay']==1]

In [45]:
back_bets

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,version,p_sum,minutes_to_event_rounded,preds,pred_odds,back


In [46]:
lay_bets

Unnamed: 0,runner_id,runner_name,status,market_id,market_type,market_id_win,market_id_place,api_call_time_utc,handicap,ltp,total_matched,back_price_1,back_price_2,back_price_3,back_size_1,back_size_2,back_size_3,lay_price_1,lay_price_2,lay_price_3,lay_size_1,lay_size_2,lay_size_3,odds_horse_win,odds_1_win,odds_2_win,odds_3_win,odds_4_win,odds_5_win,odds_6_win,odds_7_win,odds_8_win,odds_9_win,odds_10_win,odds_11_win,odds_12_win,odds_13_win,odds_14_win,odds_15_win,odds_16_win,odds_17_win,market_time,minutes_to_event,venue,event_name,race_type,market_name,number_of_winners,number_of_runners_orig,number_of_runners,version,p_sum,minutes_to_event_rounded,preds,pred_odds,back,lay


In [47]:
output_data.shape

(18, 57)

In [48]:
breakhere

NameError: name 'breakhere' is not defined

# PAST BET OUTCOMES (ALTHOUGH THIS SHOULD ACTUALLY BE SEPARATE PROCESS), SETUP PROD LOOP WITH LOGGING

#### Place bets

In [None]:
# max_bet = 2

# i = back_bets.index[0]

# market_id = str(back_bets.at[i, 'market_id'])
# selection_id = str(back_bets.at[i, 'runner_id'])
# available = back_bets.at[i, 'back_size_1']
# bet_size = str(min(available, max_bet))
# price = str(back_bets.at[i, 'back_price_1'])
# min_fill_size = str(2)
# market_version = str(back_bets.at[i, 'version'])

# order_request = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",\
#                 "params": {"marketId":"' + market_id + '","instructions":[\
#                 {"selectionId":"' + selection_id + '","handicap":"0","side":"BACK","orderType":"LIMIT",\
#                 "limitOrder":{"size":"' + bet_size + '","price":"' + price + '","persistenceType":"LAPSE",\
#                 "timeInForce":"FILL_OR_KILL", "minFillSize":"' + min_fill_size + '"}}], "marketVersion":{"version":"' + market_version + '"}}, "id": 1}'

# request = requests.post(bet_url, data=order_request.encode('utf-8'), headers=headers)
# order_result = request.json()['result']


In [None]:
# order_result

In [None]:
success_example = {
  'status': 'SUCCESS',
  'marketId': '1.183114392',
  'instructionReports': [{'status': 'SUCCESS',
    'instruction': {'selectionId': 35589999,
     'handicap': 0.0,
     'limitOrder': {'size': 2.0, 'price': 2.46, 'timeInForce': 'FILL_OR_KILL'},
     'orderType': 'LIMIT',
     'side': 'BACK'},
    'betId': '232794952924',
    'placedDate': '2021-05-09T14:47:17.000Z',
    'averagePriceMatched': 2.48,
    'sizeMatched': 2.0,
    'orderStatus': 'EXECUTION_COMPLETE'}]}

In [None]:
failed_example = {
  'status': 'SUCCESS',
  'marketId': '1.183103768',
  'instructionReports': [{'status': 'SUCCESS',
    'instruction': {'selectionId': 7172117,
     'handicap': 0.0,
     'limitOrder': {'size': 2.0, 'price': 6.6, 'timeInForce': 'FILL_OR_KILL'},
     'orderType': 'LIMIT',
     'side': 'BACK'},
    'betId': '232794114830',
    'placedDate': '2021-05-09T14:43:02.000Z',
    'averagePriceMatched': 0.0,
    'sizeMatched': 0.0,
    'orderStatus': 'EXPIRED'}]}

In [None]:
duplicate_example = {
  'status': 'FAILURE',
  'errorCode': 'DUPLICATE_TRANSACTION',
  'marketId': '1.183114392',
  'instructionReports': [{'status': 'FAILURE',
    'errorCode': 'ERROR_IN_ORDER',
    'instruction': {'selectionId': 35589999,
     'handicap': 0.0,
     'limitOrder': {'size': 2.0, 'price': 2.46, 'timeInForce': 'FILL_OR_KILL'},
     'orderType': 'LIMIT',
     'side': 'BACK'}}]}

In [None]:
def parse_order_result(order_result):
    instruction_report = order_result.get('instructionReports', [{}])[0]
    instruction = instruction_report.get('instruction', {})
    limit_order = instruction.get('limitOrder', {})
    
    return [
        order_result.get('status', None),
        order_result.get('marketId', None),
        instruction.get('selectionId', None),
        instruction.get('handicap', None),
        limit_order.get('size', None),
        limit_order.get('price', None),
        limit_order.get('timeInForce', None),
        limit_order.get('minFillSize', None),
        instruction.get('orderType', None),
        instruction.get('side', None),
        instruction_report.get('errorCode', None),
        instruction_report.get('betId', None),
        instruction_report.get('placedDate', None),
        instruction_report.get('averagePriceMatched', None),
        instruction_report.get('sizeMatched', None),
        instruction_report.get('orderStatus', None)
    ]
    
order_cols = ['status', 'market_id', 'selection_id', 'handicap', 'size', 'price', 'time_in_force', 'min_fill_size',
              'order_type', 'side', 'error_code', 'bet_id', 'placed_date', 'average_price_matched', 'size_matched', 'order_status']

In [None]:
# max_bet = 2

# i = lay_bets.index[0]

# market_id = str(lay_bets.at[i, 'market_id'])
# selection_id = str(lay_bets.at[i, 'runner_id'])
# available = lay_bets.at[i, 'lay_size_1']
# bet_size = str(min(available, max_bet))
# price = str(lay_bets.at[i, 'lay_price_1'])
# min_fill_size = str(2)
# market_version = str(lay_bets.at[i, 'version'])

# order_request = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",\
#                 "params": {"marketId":"' + market_id + '","instructions":[\
#                 {"selectionId":"' + selection_id + '","handicap":"0","side":"LAY","orderType":"LIMIT",\
#                 "limitOrder":{"size":"' + bet_size + '","price":"' + price + '","persistenceType":"LAPSE",\
#                 "timeInForce":"FILL_OR_KILL", "minFillSize":"' + min_fill_size + '"}}], "marketVersion":{"version":"' + market_version + '"}}, "id": 1}'

# request = requests.post(bet_url, data=order_request.encode('utf-8'), headers=headers)
# order_result = request.json()['result']


In [None]:
# order_result

In [None]:
order_results = []
order_fails = []

In [None]:
max_bet = 2

for i in back_bets.index:
    market_id = str(back_bets.at[i, 'market_id'])
    selection_id = str(back_bets.at[i, 'runner_id'])
    available = back_bets.at[i, 'back_size_1']
    bet_size = str(min(available, max_bet))
    price = str(back_bets.at[i, 'back_price_1'])
    min_fill_size = str(2)
    market_version = str(back_bets.at[i, 'version'])
    
    try:
        order_request = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",\
                    "params": {"marketId":"' + market_id + '","instructions":[\
                    {"selectionId":"' + selection_id + '","handicap":"0","side":"BACK","orderType":"LIMIT",\
                    "limitOrder":{"size":"' + bet_size + '","price":"' + price + '","persistenceType":"LAPSE",\
                    "timeInForce":"FILL_OR_KILL", "minFillSize":"' + min_fill_size + '"}}], "marketVersion":{"version":"' + market_version + '"}}, "id": 1}'
        request = requests.post(bet_url, data=order_request.encode('utf-8'), headers=headers)
        order_result = request.json()['result']
        order_results.append(order_result)
    except:
        order_fails.append([market_id, selection_id, available, bet_size, price, min_fill_size, market_version])


In [None]:
max_bet = 2

for i in lay_bets.index:
    market_id = str(lay_bets.at[i, 'market_id'])
    selection_id = str(lay_bets.at[i, 'runner_id'])
    available = lay_bets.at[i, 'lay_size_1']
    bet_size = str(min(available, max_bet))
    price = str(lay_bets.at[i, 'lay_price_1'])
    min_fill_size = str(2)
    market_version = str(lay_bets.at[i, 'version'])
    
    try:
        order_request = '{"jsonrpc": "2.0", "method": "SportsAPING/v1.0/placeOrders",\
                    "params": {"marketId":"' + market_id + '","instructions":[\
                    {"selectionId":"' + selection_id + '","handicap":"0","side":"LAY","orderType":"LIMIT",\
                    "limitOrder":{"size":"' + bet_size + '","price":"' + price + '","persistenceType":"LAPSE",\
                    "timeInForce":"FILL_OR_KILL", "minFillSize":"' + min_fill_size + '"}}], "marketVersion":{"version":"' + market_version + '"}}, "id": 1}'
        request = requests.post(bet_url, data=order_request.encode('utf-8'), headers=headers)
        order_result = request.json()['result']
        order_results.append(order_result)
    except:
        order_fails.append([market_id, selection_id, available, bet_size, price, min_fill_size, market_version])


In [None]:
order_results_df = []
for o in order_results:
    order_results_df.append(parse_order_result(o))
order_results_df = pd.DataFrame(order_results_df, columns=order_cols)

order_fails_df = pd.DataFrame(order_fails, columns=['market_id', 'selection_id', 'available', 'bet_size', 'price', 'min_fill_size', 'market_version'])

In [None]:
order_results_df

#### Send all data to db

In [None]:
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/betfair'
sql_engine = sqlalchemy.create_engine(connect_string)

In [None]:
market_details_combined.to_sql(name='market_details_combined_live', con=sql_engine, schema='betfair', if_exists='append', index=False)
runners_and_odds.to_sql(name='runners_and_odds_live', con=sql_engine, schema='betfair', if_exists='append', index=False)
output_data.to_sql(name='predictions_output_live', con=sql_engine, schema='betfair', if_exists='append', index=False)
order_results_df.to_sql(name='order_results_live', con=sql_engine, schema='betfair', if_exists='append', index=False)
order_fails_df.to_sql(name='order_fails_live', con=sql_engine, schema='betfair', if_exists='append', index=False)

#### Other thoughts for productionizing:
* Make separate classes
* Store models more intelligently, perhaps as 1 dict and also with supplemental info like features, default values etc (although is some of this already in model artifact?)
* Probably need more checks and controls