# `remove_races_with_zero_winning_time.ipynb`

### Author: Anthony Hein

#### Last updated: 10/19/2021

# Overview:

It was discovered that some races have a winning time of zero, which does not make sense and so must be removed from the dataset.

---

## Setup

In [1]:
import git
import os
from typing import List
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
BASE_DIR = git.Repo(os.getcwd(), search_parent_directories=True).working_dir
BASE_DIR

'/Users/anthonyhein/Desktop/SML310/project'

---

## Load `races_featurized.csv`

In [9]:
races_clean_augment_clean = pd.read_csv(f"{BASE_DIR}/data/csv/races_clean_augment_clean.csv", low_memory=False) 
races_clean_augment_clean.head()

Unnamed: 0,rid,course,winningTime,metric,ncond,margin,runners,temp,msl,rain,rhum,Station number,datetime
0,302858,Thurles (IRE),277.2,3821.0,1,1.219263,6,2.2,1012.7,0.0,82,4919,1997-01-09 13:15:00
1,291347,Punchestown (IRE),447.2,5229.0,5,1.218049,9,8.1,992.8,0.0,79,3723,1997-02-16 15:40:00
2,377929,Leopardstown (IRE),106.4,1609.0,4,1.204927,5,10.1,996.7,0.0,76,532,1997-05-11 15:00:00
3,275117,Curragh (IRE),125.9,2011.0,4,1.083838,5,15.8,1030.1,0.0,53,3723,1997-05-25 15:35:00
4,66511,Leopardstown (IRE),116.3,1810.0,1,1.077871,5,16.3,1022.9,0.0,53,532,1997-06-02 16:30:00


In [11]:
races_clean_augment_clean.shape

(19260, 13)

---

## Load `horses_augment_dist.csv`

In [5]:
horses_augment_dist = pd.read_csv(f"{BASE_DIR}/data/csv/horses_augment_dist.csv", low_memory=False) 
horses_augment_dist.head()

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,trainerName,jockeyName,position,positionL,dist,outHandicap,RPR,TR,OR,father,mother,gfather,weight
0,302858,Kings Return,6.0,4.0,0.6,W P Mullins,D J Casey,1,,0.0,0.0,102.0,51.591987,79.654604,King's Ride,Browne's Return,Deep Run,73
1,302858,Majestic Red I,6.0,5.0,0.047619,John Hackett,Conor O'Dwyer,2,8,8.0,0.0,94.0,51.591987,79.654604,Long Pond,Courtlough Lady,Giolla Mear,73
2,302858,Clearly Canadian,6.0,2.0,0.166667,D T Hughes,G Cotter,3,1.5,9.5,0.0,92.0,51.591987,79.654604,Nordico,Over The Seas,North Summit,71
3,302858,Bernestic Wonder,8.0,1.0,0.058824,E McNamara,J Old Jones,4,dist,39.5,0.0,71.87665,51.591987,79.654604,Roselier,Miss Reindeer,Reindeer,73
4,302858,Beauty's Pride,5.0,6.0,0.038462,J J Lennon,T Martin,5,dist,69.5,0.0,71.87665,51.591987,79.654604,Noalto,Elena's Beauty,Tarqogan,66


In [6]:
horses_augment_dist.shape

(194898, 18)

---

## Remove Races w/ Zero Winning Time

In [12]:
len(races_clean_augment_clean[races_clean_augment_clean['winningTime'] == 0])

32

In [14]:
races_clean_augment_clean = races_clean_augment_clean[races_clean_augment_clean['winningTime'] != 0]
races_clean_augment_clean

Unnamed: 0,rid,course,winningTime,metric,ncond,margin,runners,temp,msl,rain,rhum,Station number,datetime
0,302858,Thurles (IRE),277.2,3821.0,1,1.219263,6,2.2,1012.7,0.0,82,4919,1997-01-09 13:15:00
1,291347,Punchestown (IRE),447.2,5229.0,5,1.218049,9,8.1,992.8,0.0,79,3723,1997-02-16 15:40:00
2,377929,Leopardstown (IRE),106.4,1609.0,4,1.204927,5,10.1,996.7,0.0,76,532,1997-05-11 15:00:00
3,275117,Curragh (IRE),125.9,2011.0,4,1.083838,5,15.8,1030.1,0.0,53,3723,1997-05-25 15:35:00
4,66511,Leopardstown (IRE),116.3,1810.0,1,1.077871,5,16.3,1022.9,0.0,53,532,1997-06-02 16:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19255,227342,Cork (IRE),248.2,3218.0,9,1.540875,14,4.0,1003.5,0.4,83,3904,1999-12-12 15:30:00
19256,243638,Down Royal (IRE),252.0,3218.0,9,1.576670,14,2.5,988.8,0.0,93,2437,1999-12-27 15:30:00
19257,44932,Clonmel (IRE),296.2,3620.0,12,1.595269,12,3.5,990.3,0.0,85,3613,1999-12-27 15:35:00
19258,360255,Leopardstown (IRE),253.0,3218.0,11,1.296711,10,2.7,1021.3,0.0,88,532,1999-12-29 15:45:00


In [15]:
horses_augment_dist = horses_augment_dist[horses_augment_dist['rid'].isin(races_clean_augment_clean['rid'])]
horses_augment_dist

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,trainerName,jockeyName,position,positionL,dist,outHandicap,RPR,TR,OR,father,mother,gfather,weight
0,302858,Kings Return,6.0,4.0,0.600000,W P Mullins,D J Casey,1,,0.00,0.0,102.00000,51.591987,79.654604,King's Ride,Browne's Return,Deep Run,73
1,302858,Majestic Red I,6.0,5.0,0.047619,John Hackett,Conor O'Dwyer,2,8,8.00,0.0,94.00000,51.591987,79.654604,Long Pond,Courtlough Lady,Giolla Mear,73
2,302858,Clearly Canadian,6.0,2.0,0.166667,D T Hughes,G Cotter,3,1.5,9.50,0.0,92.00000,51.591987,79.654604,Nordico,Over The Seas,North Summit,71
3,302858,Bernestic Wonder,8.0,1.0,0.058824,E McNamara,J Old Jones,4,dist,39.50,0.0,71.87665,51.591987,79.654604,Roselier,Miss Reindeer,Reindeer,73
4,302858,Beauty's Pride,5.0,6.0,0.038462,J J Lennon,T Martin,5,dist,69.50,0.0,71.87665,51.591987,79.654604,Noalto,Elena's Beauty,Tarqogan,66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194893,227139,Old Tim,6.0,5.0,0.142857,Donal Hassett,Mr B Hassett,8,15,65.00,0.0,42.00000,51.591987,79.654604,Poet's Dream I,Settled,Blue Cashmere,73
194894,227139,Our Ling,6.0,12.0,0.111111,C P Donoghue,Philip Dempsey,9,13,78.00,0.0,24.00000,51.591987,79.654604,Vaour,May-Ling,Mississippi,73
194895,227139,Ballinarrid,6.0,2.0,0.111111,Seamus P Murphy,Mr P Fenton,10,0.75,78.75,0.0,28.00000,51.591987,79.654604,John French,Cuckaloo,Master Buck,76
194896,227139,Fountain Pen,9.0,3.0,0.047619,William J Fitzpatrick,Mr P Fahey,11,dist,108.75,0.0,71.87665,51.591987,79.654604,Royal Fountain,Monday's Pet,Menelek,74


## Save Dataframes

In [16]:
races_clean_augment_clean.to_csv(f"{BASE_DIR}/data/csv/races_clean_augment_clean.csv", index=False)

In [17]:
horses_augment_dist.to_csv(f"{BASE_DIR}/data/csv/horses_augment_dist.csv", index=False)

---