In [1]:
from bs4 import BeautifulSoup
from ipynb.fs.full.racing_tv_functions import *

import datetime
import json
import numpy as np
import pandas as pd
import re
import requests
import time

In [2]:
update_race_data(delay = 0.1)

'Date is already up to date.'

In [3]:
today_races = todays_data_df(delay = 0.1)

Collected race data for Ripon at 13:30
Collected race data for Ripon at 14:05
Collected race data for Ripon at 14:40
Collected race data for Ripon at 15:15
Collected race data for Ripon at 15:50
Collected race data for Ripon at 16:23
Collected race data for Ripon at 16:58
Collected race data for Ripon at 17:33
Collected race data for Epsom at 13:35
Collected race data for Epsom at 14:10
Collected race data for Epsom at 14:45
Collected race data for Epsom at 15:20
Collected race data for Epsom at 15:55
Collected race data for Epsom at 16:28
Collected race data for Epsom at 17:03
Collected race data for Cartmel at 14:20
Collected race data for Cartmel at 14:55
Collected race data for Cartmel at 15:30
Collected race data for Cartmel at 16:05
Collected race data for Cartmel at 16:43
Collected race data for Cartmel at 17:18
Collected race data for Chepstow at 13:53
Collected race data for Chepstow at 14:28
Collected race data for Chepstow at 15:03
Collected race data for Chepstow at 15:38
C

In [4]:
horse_df = unpack_json_to_df('historical_horse_data.json')

In [5]:
cleaned_df = horse_df.dropna(subset=['Position'])
cleaned_df = cleaned_df.replace('N/A', np.nan)
cleaned_df[['Finishing Speed (%)', 'Top Speed Value (mph)']] = cleaned_df[['Finishing Speed (%)', 'Top Speed Value (mph)']].astype(float)

In [None]:
conditions = {}
id_list = cleaned_df['Race Id'].unique().tolist()
for race_id in id_list:
    conditions.update({race_id : finish_condition(cleaned_df, race_id)})

In [None]:
high_condition_dict, med_condition_dict, low_condition_dict = ({} for i in range(3))

for race_id in conditions.keys():
    condition_list = conditions[race_id]
    # Update dictionary with {race_id, mean + std}
    high_condition_dict.update({race_id : condition_list[0] + condition_list[1]})
    # Update dictionary with {race_id, mean + 0.8 * std}
    med_condition_dict.update({race_id : condition_list[0] + 0.8 * condition_list[1]})
    # Update dictionary with {race_id, mean + 0.5 * std}
    low_condition_dict.update({race_id : condition_list[0] + 0.5 * condition_list[1]})

In [None]:
high_fast_list = []
speed_list = cleaned_df['Finishing Speed (%)'].tolist()
id_list = cleaned_df['Race Id'].tolist()

for i in range(len(speed_list)):
    condition = speed_list[i] >= high_condition_dict[id_list[i]]
    if condition:
        high_fast_list.append(True)
    else:
        high_fast_list.append(np.nan)

In [None]:
med_fast_list = []

for i in range(len(speed_list)):
    condition = speed_list[i] >= med_condition_dict[id_list[i]]
    if condition:
        med_fast_list.append(True)
    else:
        med_fast_list.append(np.nan)

In [None]:
low_fast_list = []

for i in range(len(speed_list)):
    condition = speed_list[i] >= low_condition_dict[id_list[i]]
    if condition:
        low_fast_list.append(True)
    else:
        low_fast_list.append(np.nan)

In [None]:
cleaned_df.insert(0, 'Low Fast Finish' , low_fast_list)
cleaned_df.insert(0, 'Med Fast Finish' , med_fast_list)
cleaned_df.insert(0, 'High Fast Finish' , high_fast_list)

In [None]:
grouped_df = cleaned_df.groupby([cleaned_df['Horse']]).agg(
    **{
        # 'Best Finish (%)' : pd.NamedAgg(column = 'Finishing Speed (%)', aggfunc = 'max'),
        # 'Mean Finish (%)' : pd.NamedAgg(column = 'Finishing Speed (%)', aggfunc = 'mean'),
        # 'Worst Finish (%)' : pd.NamedAgg(column = 'Finishing Speed (%)', aggfunc = 'min'),
        # 'Std Finish (%)' : pd.NamedAgg(column = 'Finishing Speed (%)', aggfunc = 'std'),
        # 'Median Finish (%)' : pd.NamedAgg(column = 'Finishing Speed (%)', aggfunc = 'median'),
        'Best Speed (mph)' : pd.NamedAgg(column = 'Top Speed Value (mph)', aggfunc = 'max'),
        'Best Pos' : pd.NamedAgg(column = 'Position', aggfunc = 'min'),
        'Mean Pos' : pd.NamedAgg(column = 'Position', aggfunc = 'mean'),
        'Worst Pos' : pd.NamedAgg(column = 'Position', aggfunc = 'max'),
        'Std Pos' : pd.NamedAgg(column = 'Position', aggfunc = 'std'),
        'High Fast Finishes' : pd.NamedAgg(column = 'High Fast Finish', aggfunc = 'count'),
        'Med Fast Finishes' : pd.NamedAgg(column = 'Med Fast Finish', aggfunc = 'count'),
        'Low Fast Finishes' : pd.NamedAgg(column = 'Low Fast Finish', aggfunc = 'count'),
        'Num Races' : pd.NamedAgg(column = 'Position', aggfunc = 'count'), # Number of non-null position values (we have removed all null values)
    }
)

grouped_df['High Fast Races (%)'] = grouped_df['High Fast Finishes'] / grouped_df['Num Races']
grouped_df['Med Fast Races (%)'] = grouped_df['Med Fast Finishes'] / grouped_df['Num Races']
grouped_df['Low Fast Races (%)'] = grouped_df['Low Fast Finishes'] / grouped_df['Num Races']

In [None]:
grouped_df = grouped_df[['High Fast Races (%)', 'Med Fast Races (%)', 'Low Fast Races (%)', 'Best Speed (mph)',
                         'Best Pos', 'Mean Pos', 'Worst Pos', 'Std Pos', 'High Fast Finishes', 'Med Fast Finishes',
                         'Low Fast Finishes', 'Num Races']]

In [None]:
today_races = today_races[['Off Time', 'Track', 'Tips']]

today_races = today_races.join(grouped_df)

today_races = today_races.reset_index().set_index('Off Time')
today_races = today_races[['Track', 'Horse', 'High Fast Races (%)', 'Med Fast Races (%)', 'Low Fast Races (%)',
                           'Best Speed (mph)', 'Best Pos', 'Mean Pos', 'Worst Pos', 'Std Pos',
#                           'High Fast Finishes', 'Med Fast Finishes', 'Low Fast Finishes',
                           'Num Races', 'Tips']]

In [None]:
todays_date = pd.Timestamp.today().strftime('%d-%m-%Y')
today_races.round(2).to_csv('./Horse Data/horse_data_{:s}.csv'.format(todays_date))

In [None]:
today_races