In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf

from analysis import *

In [3]:
horses = pd.read_csv('./../data/horses.csv', dtype=horsedtype)
races = pd.read_csv('./../data/races.csv', dtype=racedtype)

horses['date'] = pd.to_datetime(horses['date'])
races['date'] = pd.to_datetime(races['date'])
races['startTime'] = pd.to_datetime(races['startTime'])

In [4]:
horses['placeFin'] = pd.Series([np.nan] * horses.shape[0])
horses['lengthsBackFin'] = pd.Series([np.nan] * horses.shape[0])
for i in reversed(range(1, 7)):
    horses.loc[horses['placeFin'].isnull(),'placeFin'] = horses.loc[horses['placeFin'].isnull(), 'placeSeg' + str(i)]
    horses.loc[horses['lengthsBackFin'].isnull(),'lengthsBackFin'] = horses.loc[horses['lengthsBackFin'].isnull(), 'rlLengthsSeg' + str(i)]

horses.loc[horses['placeFin'] == 1, 'lengthsBackFin'] = 0

horses = horses.sort_values(by=['date', 'track', 'race', 'odds'])
horses = flagNew(horses, flagColName='newRace', cols=['date', 'track', 'race'])
horses = iterateFlagNew(horses, flagColName='newRace', iterateColName='oddsRank').sort_index()
horses['probability'] = 1 / (horses['odds'] + 1)

horses = pd.merge(horses, horses[['track','date','race','lengthsBackFin']].groupby(['track','date','race']).mean().rename(columns={'lengthsBackFin':'avgLengthsBack'}), on=['track', 'date', 'race'])
horses['diffFromAvg'] = horses['avgLengthsBack'] - horses['lengthsBackFin']

In [5]:
combined = pd.merge(left=races, right=horses, how='inner', on=['track', 'date', 'race'])

In [6]:
subset = combined.loc[(combined['oddsRank'] == 1) & (combined['placeFin'] == 1)]
#note: assumes $2 bet
winnings = subset['firstPlaceWin'].sum()
spent = combined.loc[~(combined['firstPlaceWin'].isnull()), ['track','date','race']].drop_duplicates().shape[0] * 2
print('For bet favorite to win: spent:', spent, '; earned:', winnings, '; net income:', winnings - spent)

For bet favorite to win: spent: 90710 ; earned: 79658.40000000001 ; net income: -11051.599999999991


In [7]:
subset = combined.loc[(combined['oddsRank'] == 2) & (combined['placeFin'] <= 2)]
#note: assumes $2 bet
winnings = subset['secondPlacePlace'].sum()
spent = combined.loc[~(combined['secondPlacePlace'].isnull()), ['track','date','race']].drop_duplicates().shape[0] * 2
print('For bet second favorite to place: spent:', spent, '; earned:', winnings, '; net income:', winnings - spent)

For bet second favorite to place: spent: 90144 ; earned: 98589.7 ; net income: 8445.699999999997


In [8]:
subset = combined.loc[(combined['oddsRank'] == 3) & (combined['placeFin'] <= 3)]
#note: assumes $2 bet
winnings = subset['thirdPlaceShow'].sum()
spent = combined.loc[~(combined['thirdPlaceShow'].isnull()), ['track','date','race']].drop_duplicates().shape[0] * 2
print('For bet third favorite to show: spent:', spent, '; earned:', winnings, '; net income:', winnings - spent)

For bet third favorite to show: spent: 84776 ; earned: 84074.6 ; net income: -701.3999999999942


In [9]:
exactaSet = combined.loc[((combined['oddsRank'] == 1) & (combined['placeFin'] == 1)) | ((combined['oddsRank'] == 2) & (combined['placeFin'] == 2))]
exactaSet = exactaSet.loc[exactaSet.duplicated(subset=['track', 'date', 'race'], keep=False)]
exactaSet = exactaSet.loc[exactaSet['oddsRank'] == 1]
exactaSet = exactaSet.loc[~exactaSet['exactaPayout'].isnull()]
exactaMultiplier = 2 / pd.to_numeric(exactaSet['exactaBuyin'].str.extract('\$(.*)')[0])
winnings = (exactaMultiplier * exactaSet['exactaPayout']).sum()
spent = combined.loc[~(combined['exactaPayout'].isnull()), ['track','date','race']].drop_duplicates().shape[0] * 2
print('For bet exacta on two favorites: spent:', spent, '; earned:', winnings, '; net income:', winnings - spent)

For bet exacta on two favorites: spent: 88068 ; earned: 75376.0 ; net income: -12692.0


In [10]:
trifectaSet = combined.loc[((combined['oddsRank'] == 1) & (combined['placeFin'] == 1)) | ((combined['oddsRank'] == 2) & (combined['placeFin'] == 2)) | ((combined['oddsRank'] == 3) & (combined['placeFin'] == 3))]
trifectaSet = trifectaSet.loc[trifectaSet.duplicated(subset=['track', 'date', 'race'], keep='first')]
trifectaSet = trifectaSet.loc[trifectaSet.duplicated(subset=['track', 'date', 'race'], keep='first')]
trifectaSet = trifectaSet.loc[~trifectaSet['trifectaPayout'].isnull()]
trifectaMultiplier = 2 / pd.to_numeric(trifectaSet['trifectaBuyin'].str.extract('\$(.*)')[0])
winnings = (trifectaMultiplier * trifectaSet['trifectaPayout']).sum()
spent = combined.loc[~(combined['trifectaPayout'].isnull()), ['track','date','race']].drop_duplicates().shape[0] * 2
print('For bet trifecta on two favorites: spent:', spent, '; earned:', winnings, '; net income:', winnings - spent)

For bet trifecta on two favorites: spent: 86802 ; earned: 70191.0 ; net income: -16611.0
