In [13]:
# %pip install --quiet --upgrade pip 
# %pip install numpy --quiet
# %pip install Pandas --quiet
# %pip install sklearn --quiet
# %pip install ipywidgets --quiet

# Jockey Racing Stats Builder #

Using the data derived from the features extracted by the [Feature Analysis](https://github.com/LeeSanderson/RacingData/blob/main/Data/FeatureAnalysis.ipynb) notebook, create a database (CSV file) of Jockey stats.

In [14]:
import numpy as np
import pandas as pd
import math
from abc import ABC, abstractmethod
from datetime import datetime, date

In [15]:
races = pd.read_csv("Race_Features.csv")
races['Off'] =  pd.to_datetime(races['Off'], format='%Y-%m-%d %H:%M:%S')
races = races[races["JockeyId"] > 0]

In [16]:
races = races.sort_values(["JockeyId", "Off"], ascending=[True, False])
jockey_races = races.groupby("JockeyId").first().reset_index()

In [17]:
jockey_races.columns

Index(['JockeyId', 'RaceId', 'CourseId', 'RaceType', 'Off', 'DecimalOdds',
       'OfficialRating', 'RacingPostRating', 'TopSpeedRating',
       'DistanceInMeters', 'Going', 'Surface', 'HorseId', 'HorseName',
       'JockeyName', 'Age', 'HeadGear', 'RaceCardNumber', 'StallNumber',
       'WeightInPounds', 'FinishingPosition', 'OverallBeatenDistance',
       'RaceTimeInSeconds', 'Wins', 'Surface_AllWeather', 'Surface_Dirt',
       'Surface_Turf', 'Going_Firm', 'Going_Good', 'Going_Good_To_Firm',
       'Going_Good_To_Soft', 'Going_Heavy', 'Going_Soft', 'RaceType_Flat',
       'RaceType_Hurdle', 'RaceType_Other', 'RaceType_SteepleChase', 'Speed',
       'HorseCount', 'KnownHorseAndJockey', 'NumberOfPriorRaces',
       'LastRaceGoing', 'LastRaceSurface', 'LastRaceDistanceInMeters',
       'LastRaceWeightInPounds', 'LastRaceSpeed', 'DaysRested',
       'LastRaceDecimalOdds', 'LastRaceOfficialRating',
       'LastRaceRacingPostRating', 'LastRaceTopSpeedRating',
       'LastRaceAvgRelFinishi

In [18]:
jockey_races["NumberOfWins"] = (jockey_races["JockeyWinPercentage"] * jockey_races["JockeyNumberOfPriorRaces"]).fillna(0)
jockey_races["NumberOfTop3Finishes"] = (jockey_races["JockeyTop3Percentage"] * jockey_races["JockeyNumberOfPriorRaces"]).fillna(0)

In [19]:
jockey_races["NumberOfWins"] = jockey_races["NumberOfWins"] + (jockey_races["FinishingPosition"] == 1).astype(int)
jockey_races["NumberOfTop3Finishes"] = jockey_races["NumberOfTop3Finishes"] + (jockey_races["FinishingPosition"] < 4).astype(int)

In [20]:
jockey_races["JockeyNumberOfPriorRaces"] = jockey_races["JockeyNumberOfPriorRaces"].fillna(0)
jockey_races["AvgRelFinishingPosition"] = (((jockey_races["JockeyAvgRelFinishingPosition"] * jockey_races["JockeyNumberOfPriorRaces"]) 
                                          + (jockey_races["FinishingPosition"] / jockey_races["HorseCount"])) / (jockey_races["JockeyNumberOfPriorRaces"] + 1))

jockey_races["AvgRelFinishingPosition"] = jockey_races["AvgRelFinishingPosition"].fillna((jockey_races["FinishingPosition"] / jockey_races["HorseCount"]))
jockey_races["JockeyNumberOfPriorRaces"] = jockey_races["JockeyNumberOfPriorRaces"] + 1
jockey_races["JockeyWinPercentage"] = jockey_races["NumberOfWins"] / jockey_races["JockeyNumberOfPriorRaces"]
jockey_races["JockeyTop3Percentage"] = jockey_races["NumberOfTop3Finishes"] / jockey_races["JockeyNumberOfPriorRaces"]


In [21]:
# jockey_races[["JockeyId", "JockeyName", "JockeyWinPercentage", "JockeyTop3Percentage", "AvgRelFinishingPosition", "JockeyAvgRelFinishingPosition", "FinishingPosition", "HorseCount", "JockeyNumberOfPriorRaces"]]

In [22]:
jockey_stats = jockey_races[[
        'JockeyId', 
        'Off', 
        'JockeyNumberOfPriorRaces',
        'JockeyWinPercentage', 
        'JockeyTop3Percentage',
        'AvgRelFinishingPosition']].rename(columns={'Off': 'LastOff', 'AvgRelFinishingPosition': 'JockeyAvgRelFinishingPosition' })

In [23]:
jockey_stats.to_csv("Jockey_Stats.csv", index=False)