In [None]:
%pip install numpy
%pip install Pandas

In [2]:
import numpy as np
import pandas as pd

In [3]:
import glob
from datetime import timedelta
results_files = glob.glob('Results_*.csv')
results_files

history = pd.concat([pd.read_csv(f) for f in results_files])
history = history[history['ResultStatus'] == 'CompletedRace']
history['Off'] =  pd.to_datetime(history['Off'], format='%m/%d/%Y %H:%M:%S')
history_start = history['Off'].min().date()
history_end = history['Off'].max().date() - timedelta(days=1)
predict_window_size = 90

In [37]:
from typing import Callable

def predict_races_using(prediction_func: Callable[[pd.DataFrame, pd.DataFrame], pd.DataFrame], max_days: int = -1) -> pd.DataFrame:
    window_start = history_start
    loop_end = history_end - timedelta(days=predict_window_size)   
    daily_predictions_made = 1
    prediction_summary = pd.DataFrame(columns=['Date', 'Races', 'Predicted', 'Wins', 'Losses', 'Gains'])
    while window_start < loop_end:
        window_end = window_start + timedelta(days=predict_window_size)
        window = history[(history['Off'].dt.date >= window_start) & (history['Off'].dt.date < window_end)].copy()
        prediction_start = window_end + timedelta(days=1)
        prediction_end = prediction_start + timedelta(days=1)
        race_cards = history[(history['Off'].dt.date >= prediction_start) & (history['Off'].dt.date < prediction_end)].copy()
        race_count = race_cards["RaceId"].nunique()
        
        predictions = prediction_func(race_cards, window)

        predicted = len(predictions)
        winners = predictions[predictions['PredictedPosition'] == predictions['FinishingPosition']]
        wins = len(winners)
        winnings = winners['DecimalOdds'].sum()
        losses = predicted - wins
        percentageGains = ((winnings - losses) / predicted) * 100.0;
        print(f'Scored: {predicted}, Won: {wins}, Winnings (with £1 stake): {winnings}, Lost: {losses}, %gains/loss: {percentageGains}')

        row = pd.DataFrame([
            {
                'Date': prediction_start, 
                'Races': race_count, 
                'Predicted': predicted, 
                'Wins': wins, 
                'Winnings': winnings,
                'Losses': losses, 
                'Gains': percentageGains
            }])
        prediction_summary = pd.concat([prediction_summary, row], axis=0, ignore_index=True)

        window_start = window_start + timedelta(days=1)
        daily_predictions_made = daily_predictions_made + 1
        if max_days != -1 and daily_predictions_made > max_days:
            break
    return prediction_summary

In [5]:
def calculate_distance_type(row):
    if row['DistanceInMeters'] < 1300:
        return 'VeryShort'
    elif row['DistanceInMeters'] < 1700:
        return 'Short'
    elif row['DistanceInMeters'] < 3000:
        return 'Medium'
    elif row['DistanceInMeters'] < 4000:        
        return 'Long'
    else:
        return 'VeryLong'

In [6]:
def calculate_average_speed_over_previous_races(x):
    d = {}
    d['RacesRan'] = x['HorseId'].count()
    d['TotalDistanceInMeters'] = x['DistanceInMeters'].sum()
    d['TotalTimeInSeconds'] = x['RaceTimeInSeconds'].sum()
    d['AverageSpeed'] = d['TotalDistanceInMeters'] / d['TotalTimeInSeconds']
    return pd.Series(d, index=['RacesRan', 'TotalDistanceInMeters', 'TotalTimeInSeconds', 'AverageSpeed'])

In [7]:
def calculate_average_speed_over_previous_races(x):
    d = {}
    d['RacesRan'] = x['HorseId'].count()
    d['TotalDistanceInMeters'] = x['DistanceInMeters'].sum()
    d['TotalTimeInSeconds'] = x['RaceTimeInSeconds'].sum()
    d['AverageSpeed'] = d['TotalDistanceInMeters'] / d['TotalTimeInSeconds']
    return pd.Series(d, index=['RacesRan', 'TotalDistanceInMeters', 'TotalTimeInSeconds', 'AverageSpeed'])

In [8]:
def calculate_speed_race_aggregates(x):
    d = {}
    d['HorseCount'] = x['HorseId'].count()
    d['PreviouslyRanOnSimilarCourseCount'] = x[x['AverageSpeed'] > 0]['HorseId'].count()
    return pd.Series(d, index=['HorseCount', 'PreviouslyRanOnSimilarCourseCount'])

In [17]:
def average_speed_predictor(race_cards : pd.DataFrame, prior_race_results: pd.DataFrame) -> pd.DataFrame:
    print('Calculating distance types...')
    prior_race_results['DistanceType'] = prior_race_results.apply(calculate_distance_type, axis=1)
    print(f'Calculated distance types for {len(prior_race_results.index)} history rows...')
    race_cards['DistanceType'] = race_cards.apply(calculate_distance_type, axis=1)
    print(f'Calculated distance types for {len(prior_race_results.index)} race cards rows...')
    print('Calculating average speeds...')    
    average_speeds = prior_race_results.groupby(['HorseId', 'RaceType', 'DistanceType', 'Going']).apply(calculate_average_speed_over_previous_races)
    print(f'Calculated average speeds for {len(average_speeds.index)} horses...')
    average_speeds.sort_values('RacesRan')
    results_with_speeds = pd.merge(race_cards, average_speeds, how='left', on=['HorseId', 'RaceType', 'DistanceType', 'Going'])
    results_with_speeds['AverageSpeed'] = results_with_speeds['AverageSpeed'].fillna(0)

    races_with_speed_counts = results_with_speeds.groupby('RaceId').apply(calculate_speed_race_aggregates)    
    pass    


In [22]:
def first_runner_predictor(race_cards : pd.DataFrame, prior_race_results: pd.DataFrame) -> pd.DataFrame:
    results = race_cards[race_cards['RaceCardNumber'] == 1].copy()
    results['PredictedPosition'] = 1
    return results

In [38]:
predict_races_using(first_runner_predictor, max_days=10)

Scored: 43, Won: 9, Winnings (with £1 stake): 34.88205128205128, Lost: 34, %gains/loss: 2.051282051282044
Scored: 28, Won: 2, Winnings (with £1 stake): 5.7, Lost: 26, %gains/loss: -72.5
Scored: 50, Won: 7, Winnings (with £1 stake): 37.46666666666667, Lost: 43, %gains/loss: -11.066666666666663
Scored: 46, Won: 5, Winnings (with £1 stake): 20.819444444444443, Lost: 41, %gains/loss: -43.87077294685991
Scored: 60, Won: 16, Winnings (with £1 stake): 91.76060606060607, Lost: 44, %gains/loss: 79.6010101010101
Scored: 104, Won: 22, Winnings (with £1 stake): 99.72575757575757, Lost: 82, %gains/loss: 17.04399766899766
Scored: 64, Won: 13, Winnings (with £1 stake): 48.45000000000001, Lost: 51, %gains/loss: -3.9843749999999845
Scored: 37, Won: 6, Winnings (with £1 stake): 31.333333333333336, Lost: 31, %gains/loss: 0.9009009009009074
Scored: 48, Won: 8, Winnings (with £1 stake): 27.144444444444446, Lost: 40, %gains/loss: -26.782407407407405
Scored: 50, Won: 12, Winnings (with £1 stake): 81.94871794

Unnamed: 0,Date,Races,Predicted,Wins,Losses,Gains,Winnings
0,2022-06-06,52,43,9,34,2.051282,34.882051
1,2022-06-07,30,28,2,26,-72.5,5.7
2,2022-06-08,58,50,7,43,-11.066667,37.466667
3,2022-06-09,51,46,5,41,-43.870773,20.819444
4,2022-06-10,60,60,16,44,79.60101,91.760606
5,2022-06-11,90,104,22,82,17.043998,99.725758
6,2022-06-12,57,64,13,51,-3.984375,48.45
7,2022-06-13,38,37,6,31,0.900901,31.333333
8,2022-06-14,42,48,8,40,-26.782407,27.144444
9,2022-06-15,42,50,12,38,87.897436,81.948718


In [54]:
from datetime import date
# history[(history['Off'].dt.date == date(2022, 6, 6)) & (history['RaceCardNumber'] == None)]

history[(history['Off'].dt.date == date(2022, 6, 6))][['StallNumber', 'RaceCardNumber']]

Unnamed: 0,StallNumber,RaceCardNumber
3462,4.0,2
3463,9.0,4
3464,5.0,1
3465,3.0,3
3466,2.0,8
...,...,...
3952,6.0,1
3953,7.0,4
3954,4.0,6
3955,5.0,2
