# Win vs EW Probs

In [1]:
import pymysql
import sqlalchemy
import pandas as pd
import numpy as np
import sys
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# params
horses = 8
places = 3
ewodds = 1/5

In [20]:
np.random.seed(123)

time_means = np.random.normal(100, 10, size=horses)
time_sds = np.random.gamma(10, 1, size=horses)

In [21]:
time_means

array([ 89.14369397, 109.97345447, 102.82978498,  84.93705286,
        94.21399748, 116.51436537,  75.73320757,  95.71087371])

In [22]:
time_sds

array([14.16098824,  7.21451773, 15.08451563,  7.81319818, 18.27509583,
       18.184243  , 12.14525207, 15.08176515])

In [23]:
races = 1000
horse_times = []
for i in range(horses):
    m = time_means[i]
    s = time_sds[i]
    horse_times.append(list(np.random.normal(m, s, size=races)))

race_results = pd.DataFrame(horse_times).T

In [24]:
race_results['horse_order'] = race_results.iloc[:, :horses].apply(lambda x: list(np.argsort(x)), axis=1)
race_results['first'] = race_results['horse_order'].apply(lambda x: x[0])
race_results['second'] = race_results['horse_order'].apply(lambda x: x[1])
race_results['third'] = race_results['horse_order'].apply(lambda x: x[2])

In [25]:
race_results.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,horse_order,first,second,third
0,71.387505,110.869696,111.730326,77.277632,115.985405,145.452797,60.335772,114.803013,"[6, 0, 3, 1, 2, 7, 4, 5]",6,0,3
1,80.112502,118.503407,114.195643,84.387157,105.616344,116.832333,47.416608,86.733959,"[6, 0, 3, 7, 4, 2, 5, 1]",6,0,3
2,101.9892,116.775059,109.224769,83.360731,107.201764,105.378822,70.591698,112.865056,"[6, 3, 0, 5, 4, 2, 7, 1]",6,3,0
3,68.912163,111.438323,94.604871,78.500747,97.429474,94.188537,57.574732,94.846345,"[6, 0, 3, 5, 2, 7, 4, 1]",6,0,3
4,87.160182,104.667921,105.953947,87.134873,112.255157,115.761055,52.07354,114.905381,"[6, 3, 0, 1, 2, 4, 7, 5]",6,3,0


In [26]:
win_odds = race_results.groupby('first').size().reset_index().rename(
    columns={'first': 'horse', 0: 'wins'})
second_counts = race_results.groupby('second').size().reset_index().rename(
    columns={'second': 'horse', 0: 'seconds'})
third_counts = race_results.groupby('third').size().reset_index().rename(columns={
    'third': 'horse', 0: 'thirds'})
win_odds = win_odds.merge(second_counts, how='left', on='horse').merge(third_counts, how='left', on='horse')
win_odds['places'] = win_odds[['wins', 'seconds', 'thirds']].sum(axis=1)

win_odds['win_prob'] = win_odds['wins']/races
win_odds['place_prob'] = win_odds['places']/races
win_odds['win_odds'] = 1/win_odds['win_prob']
win_odds['place_odds'] = 1/win_odds['place_prob']
win_odds

Unnamed: 0,horse,wins,seconds,thirds,places,win_prob,place_prob,win_odds,place_odds
0,0,142,178,212,532,0.142,0.532,7.042254,1.879699
1,2,24,68,71,163,0.024,0.163,41.666667,6.134969
2,3,115,273,282,670,0.115,0.67,8.695652,1.492537
3,4,127,122,102,351,0.127,0.351,7.874016,2.849003
4,5,12,20,32,64,0.012,0.064,83.333333,15.625
5,6,502,223,158,883,0.502,0.883,1.992032,1.132503
6,7,78,115,134,327,0.078,0.327,12.820513,3.058104


In [27]:
win_odds['place_bookie_odds'] = 1+(win_odds['win_odds']-1)*ewodds

In [28]:
win_odds

Unnamed: 0,horse,wins,seconds,thirds,places,win_prob,place_prob,win_odds,place_odds,place_bookie_odds
0,0,142,178,212,532,0.142,0.532,7.042254,1.879699,2.208451
1,2,24,68,71,163,0.024,0.163,41.666667,6.134969,9.133333
2,3,115,273,282,670,0.115,0.67,8.695652,1.492537,2.53913
3,4,127,122,102,351,0.127,0.351,7.874016,2.849003,2.374803
4,5,12,20,32,64,0.012,0.064,83.333333,15.625,17.466667
5,6,502,223,158,883,0.502,0.883,1.992032,1.132503,1.198406
6,7,78,115,134,327,0.078,0.327,12.820513,3.058104,3.364103
