# `create_races_with_entries.ipynb`

### Author: Anthony Hein

#### Last updated: 9/19/2021

# Overview:

Combines the `horses_all.csv` and `races_all.csv` files into one file `races_with_entries.csv` which has:

- All columns from `races_all.csv`.
- Let $x$ be the number of columns in `horses_all.csv` that do not intersect with `races_all.csv`. Let $y$ be the maximum number of horses in any race. Then, the $x$ columns from `horses_x.csv` appear $y$ times, with prefixes `horse_1_`, `horse_2_`, ... , `horse_y_`. For a race that has fewer than $y$ horses, columns are filled with `null` values.

---

## Setup

In [269]:
import git
import os
from typing import List
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
BASE_DIR = git.Repo(os.getcwd(), search_parent_directories=True).working_dir
BASE_DIR

'/Users/anthonyhein/Desktop/SML310/project'

## Load `horses_all.csv`

In [3]:
horses_all = pd.read_csv(f"{BASE_DIR}/data/csv/horses_all.csv", low_memory=False) 
horses_all.head()

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,TR,OR,father,mother,gfather,runners,margin,weight,res_win,res_place
0,267255,Going For Broke,3.0,4.0,0.1,0,P C Haslam,Seb Sanders,1,,...,62.0,62.0,Simply Great,Empty Purse,Pennine Walk,6,1.168254,58,1.0,1.0
1,267255,Pinchincha,3.0,3.0,0.266667,0,Dave Morris,Tony Clark,2,4.0,...,56.0,65.0,Priolo,Western Heights,Shirley Heights,6,1.168254,60,0.0,1.0
2,267255,Skelton Sovereign,3.0,5.0,0.142857,0,Reg Hollinshead,D Griffiths,3,3.0,...,40.0,60.0,Contract Law,Mrs Lucky,Royal Match,6,1.168254,55,0.0,0.0
3,267255,Fast Spin,3.0,6.0,0.380952,1,David Barron,Tony Culhane,4,7.0,...,30.0,59.0,Formidable I,Topwinder,Topsider,6,1.168254,57,0.0,0.0
4,267255,As-Is,3.0,2.0,0.166667,0,Mark Johnston,J Weaver,5,7.0,...,21.0,65.0,Lomond,Capriati I,Diesis,6,1.168254,60,0.0,0.0


In [4]:
horses_all.shape

(4107315, 27)

---

## Load `races_all.csv`

In [5]:
races_all = pd.read_csv(f"{BASE_DIR}/data/csv/races_all.csv", low_memory=False) 
races_all.head()

Unnamed: 0,rid,course,time,date,title,rclass,band,ages,distance,condition,hurdles,prizes,winningTime,prize,metric,countryCode,ncond,class
0,267255,Southwell (AW),03:40,97/01/01,New Year Handicap Class E,Class 5,0-70,3yo,1m,Standard,,"[2752.25, 833.0, 406.5, 193.25]",106.9,4184.0,1609.0,GB,0,5
1,297570,Southwell (AW),12:35,97/01/01,Resolution Claiming Stakes Class F (Div I),Class 6,,4yo+,7f,Standard,,"[1944.0, 544.0, 264.0]",91.0,2752.0,1407.0,GB,0,6
2,334421,Southwell (AW),01:05,97/01/01,One Too Many Median Auction Maiden Apprentices...,Class 6,,4-6yo,1m3f,Standard,,"[2502.0, 702.0, 342.0]",150.7,3546.0,2212.0,GB,0,6
3,366304,Southwell (AW),03:10,97/01/01,Morning Call Selling Stakes Class G Southwell ...,Class 6,,3yo,1m,Standard,,"[2189.0, 614.0, 299.0]",108.6,3102.0,1609.0,GB,0,6
4,13063,Southwell (AW),02:40,97/01/01,Thinking &amp; Drinking Handicap Class E,Class 5,0-70,4yo+,2m½f,Standard,,"[2726.25, 825.0, 402.5, 191.25]",231.4,4144.0,3318.5,GB,0,5


In [6]:
races_all.shape

(396572, 18)

---

## Find Intersection of Columns

In [12]:
set(races_all.columns).intersection(set(horses_all.columns))

{'rid'}

Luckily, the intersection is only the `rid` (which we need to join these datasets anyways), so no additional processing occurs here.

---

## Get Horses in Race

In [14]:
def get_horses_in_race(df: pd.core.frame.DataFrame, rid: int) -> pd.core.frame.DataFrame:
    """
    Given a race id `rid`, return indices of all horses in the
    dataframe `df` that raced in this race.
    """
    return df[df['rid'] == rid]

In [34]:
df = get_horses_in_race(horses_all, 267255)

assert len(df) == int(df['runners'].iloc[0])
assert abs(sum(df['decimalPrice']) - df['margin'].iloc[0]) < 10e-2

df

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,TR,OR,father,mother,gfather,runners,margin,weight,res_win,res_place
0,267255,Going For Broke,3.0,4.0,0.1,0,P C Haslam,Seb Sanders,1,,...,62.0,62.0,Simply Great,Empty Purse,Pennine Walk,6,1.168254,58,1.0,1.0
1,267255,Pinchincha,3.0,3.0,0.266667,0,Dave Morris,Tony Clark,2,4.0,...,56.0,65.0,Priolo,Western Heights,Shirley Heights,6,1.168254,60,0.0,1.0
2,267255,Skelton Sovereign,3.0,5.0,0.142857,0,Reg Hollinshead,D Griffiths,3,3.0,...,40.0,60.0,Contract Law,Mrs Lucky,Royal Match,6,1.168254,55,0.0,0.0
3,267255,Fast Spin,3.0,6.0,0.380952,1,David Barron,Tony Culhane,4,7.0,...,30.0,59.0,Formidable I,Topwinder,Topsider,6,1.168254,57,0.0,0.0
4,267255,As-Is,3.0,2.0,0.166667,0,Mark Johnston,J Weaver,5,7.0,...,21.0,65.0,Lomond,Capriati I,Diesis,6,1.168254,60,0.0,0.0
5,267255,Marsh Marigold,3.0,1.0,0.111111,0,Martyn Meade,Fergus Sweeney,6,0.5,...,15.0,65.0,Tina's Pet,Pulga,Blakeney,6,1.168254,58,0.0,0.0


In [33]:
df = get_horses_in_race(horses_all, 297570)

assert len(df) == int(df['runners'].iloc[0])
assert abs(sum(df['decimalPrice']) - df['margin'].iloc[0]) < 10e-2

df

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,TR,OR,father,mother,gfather,runners,margin,weight,res_win,res_place
6,297570,Anonym,5.0,2.0,0.058824,0,David Nicholls,J Bramhill,1,,...,69.0,,Nashamaa,Bonny Bertha,Capistrano I,11,1.256241,55,1.0,1.0
7,297570,Elton Ledger,8.0,4.0,0.285714,0,Mrs N Macauley,Seb Sanders,2,1,...,70.0,,Cyrano De Bergerac,Princess Of Nashua,Crowned Prince,11,1.256241,56,0.0,1.0
8,297570,Sea Devil,11.0,10.0,0.076923,0,M J Camacho,L Charnock,3,.5,...,58.0,,Absalom,Miss Poinciana,Averof,11,1.256241,52,0.0,1.0
9,297570,Havana Miss,5.0,11.0,0.019608,0,Bryn Palling,Martin Dwyer,4,4,...,43.0,,Cigar,Miss Patdonna,Starch Reduced,11,1.256241,49,0.0,0.0
10,297570,Rambo Waltzer,5.0,5.0,0.111111,0,David Nicholls,Alex Greaves,5,2,...,55.0,,Rambo Dancer,Vindictive Lady,Foolish Pleasure,11,1.256241,56,0.0,0.0
11,297570,Little Ibnr,6.0,9.0,0.083333,0,David Evans,Jimmy Quinn,6,nk,...,46.0,,Formidable I,Zalatia,Music Boy,11,1.256241,53,0.0,0.0
12,297570,Standown,4.0,7.0,0.066667,0,J Berry,P Roberts,7,7,...,29.0,,Reprimand,Ashdown,Pharly,11,1.256241,52,0.0,0.0
13,297570,Sally Armstrong,4.0,3.0,0.047619,0,C W Thornton,Dean McKeown,8,.5,...,39.0,,Batshoof,Salinity,Standaan,11,1.256241,58,0.0,0.0
14,297570,High Premium,9.0,1.0,0.4,1,Richard Fahey,Tony Culhane,9,hd,...,44.0,,Forzando,High Halo,High Top,11,1.256241,60,0.0,0.0
15,297570,Santella Katie,4.0,8.0,0.058824,0,Linda Stubbs,Simon Whitworth,10,2,...,26.0,,Anshan,Mary Bankes,Northern Baby,11,1.256241,54,0.0,0.0


---

## Pad Horses in Race to `MAX_HORSES`

In [188]:
MAX_HORSES = max(horses_all['runners'])
MAX_HORSES

40

As shown, the true value of `MAX_HORSES` is 40. However, this is _way_ too large for what we are trying to achieve. This is why we must instaed choose to observe a subset of the races which have fewer than 40 horses run at a time to make the computation tractable. The work to select this size is done in the notebook `understand_number_of_horses_per_race.ipynb`. 

In [189]:
MAX_HORSES = 14
MAX_HORSES

14

In [190]:
EMPTY_ROW = {c: None for c in horses_all.columns}
EMPTY_ROW

{'rid': None,
 'horseName': None,
 'age': None,
 'saddle': None,
 'decimalPrice': None,
 'isFav': None,
 'trainerName': None,
 'jockeyName': None,
 'position': None,
 'positionL': None,
 'dist': None,
 'weightSt': None,
 'weightLb': None,
 'overWeight': None,
 'outHandicap': None,
 'headGear': None,
 'RPR': None,
 'TR': None,
 'OR': None,
 'father': None,
 'mother': None,
 'gfather': None,
 'runners': None,
 'margin': None,
 'weight': None,
 'res_win': None,
 'res_place': None}

In [191]:
def pad_horses_in_race(df: pd.core.frame.DataFrame, max_horses: int = MAX_HORSES) -> pd.core.frame.DataFrame:
    """
    Given a dataframe `df` that is the result of a call to `get_horses_in_race`,
    append rows of `EMPTY_ROW` until this `df` is length `max_horses`. But, all
    rows must have the `rid` feature set for later merging.
    """
    rid = df.iloc[0, df.columns.get_loc('rid')]
    
    for _ in range(max_horses - len(df)):
        df = df.append(pd.Series(EMPTY_ROW), ignore_index=True)
        df.iloc[-1, df.columns.get_loc('rid')] = rid

    return df

In [192]:
df = pad_horses_in_race(get_horses_in_race(horses_all, 267255), 8)

assert len(df) == 8
assert df.iloc[6, df.columns.get_loc('rid')] == df.iloc[0, df.columns.get_loc('rid')]
assert df.iloc[7, df.columns.get_loc('rid')] == df.iloc[0, df.columns.get_loc('rid')]
assert np.any(df.iloc[6].isna())
assert np.any(df.iloc[7].isna())

df

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,TR,OR,father,mother,gfather,runners,margin,weight,res_win,res_place
0,267255,Going For Broke,3.0,4.0,0.1,0.0,P C Haslam,Seb Sanders,1.0,,...,62.0,62.0,Simply Great,Empty Purse,Pennine Walk,6.0,1.168254,58.0,1.0,1.0
1,267255,Pinchincha,3.0,3.0,0.266667,0.0,Dave Morris,Tony Clark,2.0,4.0,...,56.0,65.0,Priolo,Western Heights,Shirley Heights,6.0,1.168254,60.0,0.0,1.0
2,267255,Skelton Sovereign,3.0,5.0,0.142857,0.0,Reg Hollinshead,D Griffiths,3.0,3.0,...,40.0,60.0,Contract Law,Mrs Lucky,Royal Match,6.0,1.168254,55.0,0.0,0.0
3,267255,Fast Spin,3.0,6.0,0.380952,1.0,David Barron,Tony Culhane,4.0,7.0,...,30.0,59.0,Formidable I,Topwinder,Topsider,6.0,1.168254,57.0,0.0,0.0
4,267255,As-Is,3.0,2.0,0.166667,0.0,Mark Johnston,J Weaver,5.0,7.0,...,21.0,65.0,Lomond,Capriati I,Diesis,6.0,1.168254,60.0,0.0,0.0
5,267255,Marsh Marigold,3.0,1.0,0.111111,0.0,Martyn Meade,Fergus Sweeney,6.0,0.5,...,15.0,65.0,Tina's Pet,Pulga,Blakeney,6.0,1.168254,58.0,0.0,0.0
6,267255,,,,,,,,,,...,,,,,,,,,,
7,267255,,,,,,,,,,...,,,,,,,,,,


In [193]:
df = pad_horses_in_race(get_horses_in_race(horses_all, 297570), 14)

assert len(df) == 14
assert df.iloc[11, df.columns.get_loc('rid')] == df.iloc[0, df.columns.get_loc('rid')]
assert df.iloc[12, df.columns.get_loc('rid')] == df.iloc[0, df.columns.get_loc('rid')]
assert df.iloc[13, df.columns.get_loc('rid')] == df.iloc[0, df.columns.get_loc('rid')]
assert np.any(df.iloc[11].isna())
assert np.any(df.iloc[12].isna())
assert np.any(df.iloc[13].isna())

df

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,TR,OR,father,mother,gfather,runners,margin,weight,res_win,res_place
0,297570,Anonym,5.0,2.0,0.058824,0.0,David Nicholls,J Bramhill,1.0,,...,69.0,,Nashamaa,Bonny Bertha,Capistrano I,11.0,1.256241,55.0,1.0,1.0
1,297570,Elton Ledger,8.0,4.0,0.285714,0.0,Mrs N Macauley,Seb Sanders,2.0,1,...,70.0,,Cyrano De Bergerac,Princess Of Nashua,Crowned Prince,11.0,1.256241,56.0,0.0,1.0
2,297570,Sea Devil,11.0,10.0,0.076923,0.0,M J Camacho,L Charnock,3.0,.5,...,58.0,,Absalom,Miss Poinciana,Averof,11.0,1.256241,52.0,0.0,1.0
3,297570,Havana Miss,5.0,11.0,0.019608,0.0,Bryn Palling,Martin Dwyer,4.0,4,...,43.0,,Cigar,Miss Patdonna,Starch Reduced,11.0,1.256241,49.0,0.0,0.0
4,297570,Rambo Waltzer,5.0,5.0,0.111111,0.0,David Nicholls,Alex Greaves,5.0,2,...,55.0,,Rambo Dancer,Vindictive Lady,Foolish Pleasure,11.0,1.256241,56.0,0.0,0.0
5,297570,Little Ibnr,6.0,9.0,0.083333,0.0,David Evans,Jimmy Quinn,6.0,nk,...,46.0,,Formidable I,Zalatia,Music Boy,11.0,1.256241,53.0,0.0,0.0
6,297570,Standown,4.0,7.0,0.066667,0.0,J Berry,P Roberts,7.0,7,...,29.0,,Reprimand,Ashdown,Pharly,11.0,1.256241,52.0,0.0,0.0
7,297570,Sally Armstrong,4.0,3.0,0.047619,0.0,C W Thornton,Dean McKeown,8.0,.5,...,39.0,,Batshoof,Salinity,Standaan,11.0,1.256241,58.0,0.0,0.0
8,297570,High Premium,9.0,1.0,0.4,1.0,Richard Fahey,Tony Culhane,9.0,hd,...,44.0,,Forzando,High Halo,High Top,11.0,1.256241,60.0,0.0,0.0
9,297570,Santella Katie,4.0,8.0,0.058824,0.0,Linda Stubbs,Simon Whitworth,10.0,2,...,26.0,,Anshan,Mary Bankes,Northern Baby,11.0,1.256241,54.0,0.0,0.0


---

## Rename Columns in a Dataframe w/ Prefix

In [194]:
def rename_column(horse_num: int, column: str, max_horses: int = MAX_HORSES) -> str:
    """
    Given `horse_num`, which is the number of the horse in a given race, a `column`,
    and the maximum number of horses in a single race `max_horses`, return the renamed
    column for use in a new dataframe.
    """
    fmt_str = f"horse_{{i:0{len(str(max_horses))}d}}_{{c}}"
    
    if column == "rid":
        return column
    else:
        return fmt_str.format(i=horse_num, c=column)

In [195]:
df = horses_all.iloc[[0]].rename(columns= lambda c: rename_column(1, c, 8))
df

Unnamed: 0,rid,horse_1_horseName,horse_1_age,horse_1_saddle,horse_1_decimalPrice,horse_1_isFav,horse_1_trainerName,horse_1_jockeyName,horse_1_position,horse_1_positionL,...,horse_1_TR,horse_1_OR,horse_1_father,horse_1_mother,horse_1_gfather,horse_1_runners,horse_1_margin,horse_1_weight,horse_1_res_win,horse_1_res_place
0,267255,Going For Broke,3.0,4.0,0.1,0,P C Haslam,Seb Sanders,1,,...,62.0,62.0,Simply Great,Empty Purse,Pennine Walk,6,1.168254,58,1.0,1.0


In [196]:
df = horses_all.iloc[[1]].rename(columns= lambda c: rename_column(2, c, 12))
df

Unnamed: 0,rid,horse_02_horseName,horse_02_age,horse_02_saddle,horse_02_decimalPrice,horse_02_isFav,horse_02_trainerName,horse_02_jockeyName,horse_02_position,horse_02_positionL,...,horse_02_TR,horse_02_OR,horse_02_father,horse_02_mother,horse_02_gfather,horse_02_runners,horse_02_margin,horse_02_weight,horse_02_res_win,horse_02_res_place
1,267255,Pinchincha,3.0,3.0,0.266667,0,Dave Morris,Tony Clark,2,4,...,56.0,65.0,Priolo,Western Heights,Shirley Heights,6,1.168254,60,0.0,1.0


In [197]:
df = horses_all.iloc[[11]].rename(columns= lambda c: rename_column(12, c, 12))
df

Unnamed: 0,rid,horse_12_horseName,horse_12_age,horse_12_saddle,horse_12_decimalPrice,horse_12_isFav,horse_12_trainerName,horse_12_jockeyName,horse_12_position,horse_12_positionL,...,horse_12_TR,horse_12_OR,horse_12_father,horse_12_mother,horse_12_gfather,horse_12_runners,horse_12_margin,horse_12_weight,horse_12_res_win,horse_12_res_place
11,297570,Little Ibnr,6.0,9.0,0.083333,0,David Evans,Jimmy Quinn,6,nk,...,46.0,,Formidable I,Zalatia,Music Boy,11,1.256241,53,0.0,0.0


---

## Reshape Horses in Race

In [198]:
def reshape_horses_in_race(df: pd.core.frame.DataFrame, max_horses: int = MAX_HORSES) -> pd.core.frame.DataFrame:
    """
    Given a dataframe `df` that is the result of a call to `get_horses_in_race` and
    has been padded to the necessary number of rows, reshape this dataframe to be one
    long row containing columns which are the same as before except with prefixes 
    `horse_1_`, `horse_2_`, ... , `horse_{MAX_HORSES}_`.
    """    

    acc = df.iloc[[0]].rename(columns=lambda c: rename_column(1, c, max_horses))
    
    for i in range(1, max_horses):
        addition = df.iloc[[i]].rename(columns=lambda c: rename_column(i+1, c, max_horses))
        acc = pd.merge(left=acc, right=addition, left_on='rid', right_on='rid')
        
    return acc

In [199]:
df = reshape_horses_in_race(pad_horses_in_race(get_horses_in_race(horses_all, 267255), 8), 8)

assert len(df.columns) == 8 * (len(horses_all.columns) - 1) + 1

df

Unnamed: 0,rid,horse_1_horseName,horse_1_age,horse_1_saddle,horse_1_decimalPrice,horse_1_isFav,horse_1_trainerName,horse_1_jockeyName,horse_1_position,horse_1_positionL,...,horse_8_TR,horse_8_OR,horse_8_father,horse_8_mother,horse_8_gfather,horse_8_runners,horse_8_margin,horse_8_weight,horse_8_res_win,horse_8_res_place
0,267255,Going For Broke,3.0,4.0,0.1,0,P C Haslam,Seb Sanders,1,,...,,,,,,,,,,


In [200]:
df = reshape_horses_in_race(pad_horses_in_race(get_horses_in_race(horses_all, 297570), 14), 14)

assert len(df.columns) == 14 * (len(horses_all.columns) - 1) + 1

df

Unnamed: 0,rid,horse_01_horseName,horse_01_age,horse_01_saddle,horse_01_decimalPrice,horse_01_isFav,horse_01_trainerName,horse_01_jockeyName,horse_01_position,horse_01_positionL,...,horse_14_TR,horse_14_OR,horse_14_father,horse_14_mother,horse_14_gfather,horse_14_runners,horse_14_margin,horse_14_weight,horse_14_res_win,horse_14_res_place
0,297570,Anonym,5.0,2.0,0.058824,0,David Nicholls,J Bramhill,1,,...,,,,,,,,,,


---

## Select Races w/ `≤ MAX_HORSES` Horses

In [270]:
# WARNING: may be correct, but too slow in practice, must make a map from rid to
# runners then use that instead of finding runners per ever id (which involves search)

def get_num_horses_in_race(df: pd.core.frame.DataFrame, rids: List[int]) -> int:
    """
    Get the number of horses / runners in the races with ids `rids`, where
    this information is found using the dataframe `df`.
    """
    try:
        return [df['runners'][df['rid'] == rid].iloc[0] for rid in tqdm(rids)]
    except:
        print(f"Cannot find any horses to have participated in {rid}")

In [None]:
# WARNING: too slow, about 30 minutes

get_num_horses_in_race(horses_all, list(races_all['rid']))

In [274]:
# takes about 3 minutes

d = {}
for _, row in tqdm(horses_all.iterrows()):
    d[row['rid']] = row['runners']

4107315it [02:47, 24521.52it/s]


In [276]:
idxs = [
    True if d[rid] <= MAX_HORSES
    else False
    for rid
    in races_all['rid']
]

In [277]:
races_leq_max_horses = races_all[idxs]
races_leq_max_horses.head()

Unnamed: 0,rid,course,time,date,title,rclass,band,ages,distance,condition,hurdles,prizes,winningTime,prize,metric,countryCode,ncond,class
0,267255,Southwell (AW),03:40,97/01/01,New Year Handicap Class E,Class 5,0-70,3yo,1m,Standard,,"[2752.25, 833.0, 406.5, 193.25]",106.9,4184.0,1609.0,GB,0,5
1,297570,Southwell (AW),12:35,97/01/01,Resolution Claiming Stakes Class F (Div I),Class 6,,4yo+,7f,Standard,,"[1944.0, 544.0, 264.0]",91.0,2752.0,1407.0,GB,0,6
2,334421,Southwell (AW),01:05,97/01/01,One Too Many Median Auction Maiden Apprentices...,Class 6,,4-6yo,1m3f,Standard,,"[2502.0, 702.0, 342.0]",150.7,3546.0,2212.0,GB,0,6
3,366304,Southwell (AW),03:10,97/01/01,Morning Call Selling Stakes Class G Southwell ...,Class 6,,3yo,1m,Standard,,"[2189.0, 614.0, 299.0]",108.6,3102.0,1609.0,GB,0,6
4,13063,Southwell (AW),02:40,97/01/01,Thinking &amp; Drinking Handicap Class E,Class 5,0-70,4yo+,2m½f,Standard,,"[2726.25, 825.0, 402.5, 191.25]",231.4,4144.0,3318.5,GB,0,5


In [278]:
len(races_leq_max_horses)

332590

In [279]:
len(races_leq_max_horses) / len(races_all)

0.8416037005359501

---

## Append Horses in Race for All Races

In [282]:
# WARNING: do not run for large values of MAX_HORSES
# even with MAX_HORSES = 14 this is still a very long operation

dfs = [
    reshape_horses_in_race(pad_horses_in_race(get_horses_in_race(horses_all, rid)))
    for rid
    in tqdm(races_leq_max_horses['rid'])
]
dfs.head()

  0%|▏                                                                                                                                    | 492/332590 [00:55<11:08:33,  8.28it/s]

KeyboardInterrupt: 

---

## Join Race to Horses in the Race

In [None]:
def join_race_to_horses(race_df)

---