In [96]:
from sqlalchemy import create_engine
from operator import itemgetter
from itertools import permutations
import json
import math
import time
import random

import pandas as pd
import numpy as np
#import seaborn as sns
from sklearn import linear_model
from skopt import gp_minimize
import scipy as sp
import matplotlib.pyplot as plt
%matplotlib inline

np.set_printoptions(precision=6, suppress=True)

In [33]:
race_type = 'R'
# category = 'G'
# category = 'H'

bet_type = 'E'
# bet_type = 'Q'

r = 2

In [11]:
engine = create_engine('sqlite:///../../data/exotic.db')
conn = engine.connect()

dfo = pd.read_sql_table('exotic', conn)
print('{} exotics loaded!'.format(len(dfo)))

386174 exotics loaded!


In [12]:
df = dfo.loc[dfo['race_type'].isin([race_type])]
print('{} {} races for {} betting!'.format(len(df), race_type, bet_type))

386174 R races for E betting!


In [13]:
df.tail(10)

Unnamed: 0,id,race_id,runner_numbers,race_type,bet_type,res1,res2,res3,res4,num_runners,...,run4_num,run4_win_perc,run4_win_scaled,run4_win_rank,run4_place_perc,run4_place_scaled,run4_place_rank,prediction,success,dividend
386164,386165,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.01379,0,27.4
386165,386166,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.009348,0,27.4
386166,386167,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.008661,0,27.4
386167,386168,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.004337,0,27.4
386168,386169,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.002519,0,27.4
386169,386170,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.002019,0,27.4
386170,386171,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.001461,0,27.4
386171,386172,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.001101,0,27.4
386172,386173,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.000774,0,27.4
386173,386174,47713,24738516,R,E,3,4,7,2,8.0,...,,,,,,,,0.000298,0,27.4


In [98]:
# bet size
balance = 1000
bet_chunk = balance * 0.01
grouped = df.groupby('race_id')

In [112]:
def bet_combinations(combs, r, x):
    """create prediction box"""

#     print(combs)
    runner_numbers = combs[0]['runner_numbers'].split(',')
#     print(runner_numbers)

    # start betting on all and cut off worse runner till positive outcome
    for box_size in range(len(runner_numbers), r, -1):

        # recreate smaller pool
        pool = runner_numbers[:box_size]
        pool = list(map(int, pool))
#         print('pool is {} from {} box size'.format(len(pool), box_size))
        
        # get all combs for pool
        subset = [c for c in combs if c['run1_num'] in pool and c['run2_num'] in pool]
#         print('subset of {} combs'.format(len(subset)))
        
        # get predictions
        preds = sorted(s['prediction'] for s in subset)
#         print('predictions: {}'.format(preds))

        # get percentiles
        percs = np.percentile(preds, [20, 50, 80])
#         print('percs {}'.format(percs))
        
        # break perc > x
        if all(pi > xi for xi, pi in zip(x, percs)):
            return pool

In [90]:
def bet_results(combs, box):
    """get results for box on combs"""

    perms = list(permutations(box, r))
    
    # profit start with loss of bet made
    outcome = {
        'success': 0,
        'profit': -1 * len(perms),
        'box_size': len(box),
    }
    
    # get winner comb
    comb_win = [c for i, c in combs.iterrows() if c['success']]
    if len(comb_win) != 1:
        return
    comb = comb_win[0]
    
    # get dividend
    if comb['run1_num'] in box and comb['run2_num'] in box:
        outcome['success'] = 1
        outcome['profit'] += comb['dividend']
    
    return outcome

In [106]:
def process_strategy(x, cutoff=1):
    book = []
    for _, group in grouped:
        # cut sample
        if random.random() > cutoff:
            continue
        box = bet_combinations(group.to_dict(orient='records'), r, x)
        if not box:
#             print('no box for race {}'.format(_))
            continue
#         print('box {}'.format(box))        
        outcome = bet_results(group, box)
        if outcome:
            book.append(outcome)
#     print('Lost {} races'.format(len(grouped) - len(book)))
    return pd.DataFrame(book)

In [113]:
def f(x):
    print('.', end='')
    cutoff = 1 / len(grouped)
    book_df = process_strategy(x, cutoff)
#     print(book_df)
    if not len(book_df):
        return 0
    profit = book_df.profit.sum()
    return profit / len(grouped) * -1
    

print('optimizing', end='')
time_start = time.time()

x, *grid = sp.optimize.brute(f, np.s_[0:1:4j, 0:1:4j, 0:1:4j], full_output=True)
print('x: {}'.format(x))
# print('grid: {}'.format(grid))

print('done: took {:.0f}s'.format((time.time() - time_start)))

optimizing..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................x: [ 0.00025 -0.       0.     ]
done: took 648s


In [114]:
print('race type: {}'.format(race_type))
print('bet type: {}'.format(bet_type))
print('x: {}'.format(x))
df = process_strategy(x)
print('${:.2f} profit per race'.format(df.profit.sum() / len(grouped)))
print('{:.0f}% of races {} / {}'.format(len(df) / len(grouped) * 100, len(df), len(grouped)))

race type: R
bet type: E
x: [ 0.00025 -0.       0.     ]
$-26.79 profit per race
59% of races 4385 / 7490


In [None]:
df['ones'] = np.ones(len(df))
df['win_rate'] = df.success.cumsum() / df.ones.cumsum()
win_title = '{:.0f}% win rate'.format(df.win_rate.iloc[-1] * 100)
df.win_rate.plot(title=win_title, xlim=(100), ylim=(0, 1))

In [None]:
# ROI
roi = df.profit.rolling(20).sum() / df.bet_chunk.rolling(20).sum()
roi.plot(xlim=(100), ylim=(-1, 1), alpha=0.3)
roi = df.profit.cumsum() / df.bet_chunk.cumsum()
roi_title = '{:.1f}% ROI'.format(roi.iloc[-1] * 100)
roi.plot(xlim=(100), ylim=(-1, 1), title=roi_title)

In [None]:
# profit per num bets 
df.num_bets.plot(kind='hist', title='# bets')
groups = df.groupby('num_bets').mean()
groups.plot(kind='bar', y='success', title='Win rate per #bets')
groups.plot(kind='bar', y='profit', title='Profit per #bets')

In [None]:
# profit per num runners
df.num_runners.hist()
groups = df.groupby('num_runners').mean()
groups.plot(kind='bar', y='success', title='Win rate per #runners')
groups.plot(kind='bar', y='profit', title='Profit per #runners')

In [None]:
# max diff
df.win_diff.plot(kind='hist', bins=200, alpha=0.6, ylim=(0, 100), xlim=(0, 0.3))
df.max_diff.plot(kind='hist', bins=200, alpha=0.2, ylim=(0, 100), xlim=(0, 0.3))

In [None]:
# df.profit.plot(kind='hist', bins=50, xlim=(-5), ylim=(0, 140))
df.profit.rolling(100).sum().plot(alpha=0.4)
df.profit.cumsum().plot(alpha=0.8)

In [None]:
# bets/runners
df['bets_vs_runners'] = df.num_bets / df.num_runners
df['bets_vs_runners'].hist(bins=10)
groups = df.groupby(df['bets_vs_runners'].apply(lambda x: round(x, 3))).sum()
groups.plot(kind='line', y='profit', title='mean profit/bets_per_runners')

In [None]:
# # pred vs scale
# df['prob_vs_scale'] = df.pr / df.num_runners
# df['bets_vs_runners'].hist(bins=10)
# groups = df.groupby(df['bets_vs_runners'].apply(lambda x: round(x, 3))).sum()
# groups.plot(kind='line', y='profit', title='mean profit/bets_per_runners')

In [None]:
df.prob2scale.plot(kind='hist', bins=500, title='min prob2scale', xlim=(1, 1.5))
groups = df.groupby(df['prob2scale'].apply(lambda x: round(x, 1))).mean()
groups.plot(kind='bar', y='profit', title='profit / prob2scale')
groups.plot(kind='bar', y='win_rate', title='winrate / prob2scale')

groups_sum = df.groupby(df['prob2scale'].apply(lambda x: round(x, 1))).sum()
groups_sum.plot(kind='bar', y='profit', title='sum profit / prob2scale')

In [None]:
df.prob0scale.plot(kind='hist', bins=50, title='min prob0scale')
groups = df.groupby(df['prob0scale'].apply(lambda x: round(x, 2))).mean()
groups.plot(kind='bar', y='profit', title='profit / prob0scale')
groups.plot(kind='bar', y='win_rate', title='winrate / prob0scale')

groups_sum = df.groupby(df['prob0scale'].apply(lambda x: round(x, 2))).sum()
groups_sum.plot(kind='bar', y='profit', title='total profit')