# `extreme_weather_conditions_facilitate_horse_failure.ipynb`

### Author: Anthony Hein

#### Last updated: 11/8/2021

# Overview:

This notebook shows that extreme weather conditions facilitate horse failure (i.e. a non-finishing horse).

---

## Setup

In [1]:
from datetime import datetime
import git
import os
import re
from typing import List
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_theme(style="whitegrid")

In [2]:
BASE_DIR = git.Repo(os.getcwd(), search_parent_directories=True).working_dir
BASE_DIR

'/Users/anthonyhein/Desktop/SML310/project'

---

## Load `races_featurized.csv`

In [3]:
races_featurized = pd.read_csv(f"{BASE_DIR}/data/streamline/races_featurized.csv", low_memory=False) 
races_featurized.head()

Unnamed: 0,rid,course,title,winningTime,metric,ncond,class,runners,margin,1st_place_rank_in_odds,...,pressure_level_2,pressure_level_3,pressure_level_4,is_raining,rhum_level_0,rhum_level_1,rhum_level_2,rhum_level_3,rhum_level_4,entropy of odds
0,302858,Thurles,Liffey Maiden Hurdle (Div 1),277.2,3821.0,1,0,6,1.219263,1,...,0,1,0,0,0,0,0,0,1,1.350363
1,291347,Punchestown,Ericsson G.S.M. Grand National Trial Handicap ...,447.2,5229.0,5,0,9,1.218049,4,...,1,0,0,1,0,0,0,0,1,1.995151
2,75447,Listowel,Ballybunion E.B.F. Beginners S'chase,318.4,3620.0,5,0,8,1.27732,3,...,1,0,0,0,0,0,0,1,0,1.786421
3,358038,Punchestown,Quinns Of Baltinglass Chase (La Touche) (Cross...,533.9,6637.0,1,0,10,1.286595,1,...,0,1,0,0,0,0,0,1,0,1.611062
4,89211,Tipperary,Topaz Sprint Stakes (Listed),59.9,1005.0,4,0,5,1.217043,4,...,1,0,0,0,0,0,1,0,0,1.254374


In [4]:
races_featurized.shape

(20201, 129)

In [5]:
list(races_featurized.columns)

['rid',
 'course',
 'title',
 'winningTime',
 'metric',
 'ncond',
 'class',
 'runners',
 'margin',
 '1st_place_rank_in_odds',
 '2nd_place_rank_in_odds',
 '3rd_place_rank_in_odds',
 '1st_rank_in_odds_place',
 '2nd_rank_in_odds_place',
 '3rd_rank_in_odds_place',
 'placeAvailable',
 'showAvailable',
 'favoriteWon',
 'favoritePlaced',
 'favoriteShowed',
 'lat',
 'lng',
 'datetime',
 'station no',
 'station name',
 'station lat',
 'station lng',
 'dist to station',
 'station reading date',
 'temp',
 'msl',
 'rain',
 'rhum',
 'station reading timedelta',
 'course__Ballinrobe',
 'course__Bellewstown',
 'course__Clonmel',
 'course__Cork',
 'course__Curragh',
 'course__Down Royal',
 'course__Downpatrick',
 'course__Dundalk',
 'course__Fairyhouse',
 'course__Galway',
 'course__Gowran Park',
 'course__Killarney',
 'course__Leopardstown',
 'course__Limerick',
 'course__Listowel',
 'course__Naas',
 'course__Navan',
 'course__Other',
 'course__Punchestown',
 'course__Roscommon',
 'course__Sligo',
 '

---

## Load `horses_selected_trimmed_clean.csv`

In [6]:
horses_clean = pd.read_csv(f"{BASE_DIR}/data/streamline/horses_selected_trimmed_clean.csv", low_memory=False) 
horses_clean.head()

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,RPR,TR,OR,father,mother,gfather,weight,res_win,res_place,res_show
0,302858,Kings Return,6.0,4.0,0.6,1,W P Mullins,D J Casey,1,0,...,102.0,,,King's Ride,Browne's Return,Deep Run,73,1,1,0
1,302858,Majestic Red I,6.0,5.0,0.047619,0,John Hackett,Conor O'Dwyer,2,8,...,94.0,,,Long Pond,Courtlough Lady,Giolla Mear,73,0,1,0
2,302858,Clearly Canadian,6.0,2.0,0.166667,0,D T Hughes,G Cotter,3,1.5,...,92.0,,,Nordico,Over The Seas,North Summit,71,0,0,0
3,302858,Bernestic Wonder,8.0,1.0,0.058824,0,E McNamara,J Old Jones,4,dist,...,,,,Roselier,Miss Reindeer,Reindeer,73,0,0,0
4,302858,Beauty's Pride,5.0,6.0,0.038462,0,J J Lennon,T Martin,5,dist,...,,,,Noalto,Elena's Beauty,Tarqogan,66,0,0,0


In [7]:
horses_clean.shape

(202304, 22)

## Horse Failure

In [8]:
def has_non_finishing_horse(row: pd.core.frame.DataFrame) -> int:
    return len(horses_clean[
        (horses_clean['rid'] == row['rid']) &
        (horses_clean['position'] == 40)
    ])

In [9]:
races_featurized['horse failure'] = races_featurized.apply(has_non_finishing_horse, axis=1)
races_featurized[['rid', 'horse failure']].head(1)

Unnamed: 0,rid,horse failure
0,302858,1


In [10]:
horses_clean[horses_clean['rid'] == 302858]

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,RPR,TR,OR,father,mother,gfather,weight,res_win,res_place,res_show
0,302858,Kings Return,6.0,4.0,0.6,1,W P Mullins,D J Casey,1,0,...,102.0,,,King's Ride,Browne's Return,Deep Run,73,1,1,0
1,302858,Majestic Red I,6.0,5.0,0.047619,0,John Hackett,Conor O'Dwyer,2,8,...,94.0,,,Long Pond,Courtlough Lady,Giolla Mear,73,0,1,0
2,302858,Clearly Canadian,6.0,2.0,0.166667,0,D T Hughes,G Cotter,3,1.5,...,92.0,,,Nordico,Over The Seas,North Summit,71,0,0,0
3,302858,Bernestic Wonder,8.0,1.0,0.058824,0,E McNamara,J Old Jones,4,dist,...,,,,Roselier,Miss Reindeer,Reindeer,73,0,0,0
4,302858,Beauty's Pride,5.0,6.0,0.038462,0,J J Lennon,T Martin,5,dist,...,,,,Noalto,Elena's Beauty,Tarqogan,66,0,0,0
5,302858,Graignamanagh,6.0,3.0,0.307692,0,Harry De Bromhead,J R Barry,40,30,...,,,,Tremblant,Feathermore,Crash Course,73,0,0,0


In [11]:
dfa = races_featurized[
    ((races_featurized['temp_level_1'] == 1) | (races_featurized['temp_level_2'] == 1) | (races_featurized['temp_level_3'] == 1)) &
    ((races_featurized['pressure_level_3'] == 1) | (races_featurized['pressure_level_4'] == 1)) &
    ((races_featurized['rhum_level_1'] == 1) | (races_featurized['rhum_level_2'] == 1) | (races_featurized['rhum_level_3'] == 1)) &
    (races_featurized['is_raining'] == 0)
]

dfb = races_featurized[
    ~(((races_featurized['temp_level_1'] == 1) | (races_featurized['temp_level_2'] == 1) | (races_featurized['temp_level_3'] == 1)) &
    ((races_featurized['pressure_level_3'] == 1) | (races_featurized['pressure_level_4'] == 1)) &
    ((races_featurized['rhum_level_1'] == 1) | (races_featurized['rhum_level_2'] == 1) | (races_featurized['rhum_level_3'] == 1)) &
    (races_featurized['is_raining'] == 0))
]

len(dfa), len(dfb)

(10520, 9681)

In [12]:
np.mean(dfa['horse failure'])

0.14258555133079848

In [13]:
np.mean(dfb['horse failure'])

0.1545294907550873

In [14]:
stats.ttest_ind(dfa['horse failure'],
                dfb['horse failure'],
                equal_var=False)

Ttest_indResult(statistic=-1.4591839139877067, pvalue=0.14453048233498583)

In [31]:
races_featurized['horse failure'].value_counts()

0     18267
1      1399
2       289
3       105
4        63
5        41
6        22
7         9
8         3
9         2
10        1
Name: horse failure, dtype: int64

In [32]:
races_featurized[races_featurized['horse failure'] > 7][['rid', 'datetime', 'title', 'ncond', 'horse failure', 'rain', 'temp', 'msl', 'rhum']]

Unnamed: 0,rid,datetime,title,ncond,horse failure,rain,temp,msl,rhum
259,60141,1991-02-28 14:30:00,New Stand Handicap Chase,9,9,0.0,7.3,1013.4,56
345,388925,1992-04-29 16:00:00,Sean Macklin Champion Hunters Chase,5,10,0.2,7.5,1018.6,84
14004,123429,2015-04-30 15:50:00,FBD Cross Country Chase for the La Touche Cup,1,8,0.0,10.5,1012.9,61
15643,123580,2017-04-07 18:05:00,Boolavogue Handicap Chase,5,9,0.0,12.7,1024.4,65
15803,145644,2017-05-19 19:10:00,Very Special Hunters Chase,1,8,0.0,10.8,1010.9,87
15877,155108,2017-06-07 16:45:00,Boolavogue Mares Maiden Hurdle,11,8,3.1,12.3,1006.9,96


The one with `rid = 155108` seems particular interesting.

Looking up this condition code, it means "yielding to soft".

Yielding: a turf course with a significant amount of 'give' to the ground due to recent rain

Soft: a turf course with a large amount of moisture. Horses sink very deeply into it

In [33]:
horses_clean[horses_clean['rid'] == 155108]

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,RPR,TR,OR,father,mother,gfather,weight,res_win,res_place,res_show
159167,155108,Regal D'argent,5.0,10.0,0.047619,0,Colin Bowe,Paddy Kennedy,1,0.0,...,111.0,,,King's Theatre,Regle D'Or,Robin Des Champs,71,1,1,1
159168,155108,Delayed Eloquence,6.0,2.0,0.5,1,Joseph Patrick O'Brien,Davy Russell,2,0.75,...,110.0,,98.0,Beneficial,Famous Lady,Presenting,73,0,1,1
159169,155108,Something Sweet,5.0,13.0,0.058824,0,Roger Joseph McGrath,Phillip Enright,3,44.0,...,66.0,,,Mahler,Beg La Eile,Lahib,73,0,0,1
159170,155108,Rain In Spain,5.0,9.0,0.066667,0,Charles O'Brien,Jonathan Moore,4,2.0,...,64.0,,,Jeremy,Moonchild,Acatenango,73,0,0,0
159171,155108,Shantelle,8.0,11.0,0.014925,0,R Donohoe,Danny Mullins,5,3.0,...,61.0,,,Shantou,Glacial Pie,Glacial Storm,73,0,0,0
159172,155108,Superefficient,5.0,14.0,0.014925,0,Ms Michelle Duggan,B J Foley,6,36.0,...,25.0,,,Beneficial,Mellowthemoonlight,Un Desperado,69,0,0,0
159173,155108,Jazz Mine,7.0,5.0,0.047619,0,John Gerard Lonergan,Ben Dalton,40,30.0,...,,,,Presenting,In The Saltmine,Damister,73,0,0,0
159174,155108,Polly Manning,7.0,8.0,0.111111,0,David Harry Kelly,Sean Flanagan,40,30.0,...,,,,Milan,Stormy Breeze,Glacial Storm,73,0,0,0
159175,155108,Galloping Anger,5.0,4.0,0.029412,0,Matthew J Smith,Denis O'Regan,40,30.0,...,,,89.0,Makfi,Whispering Blues,Sadler's Wells,73,0,0,0
159176,155108,Bradystown,6.0,1.0,0.029412,0,Sean Byrne,Mark Bolger,40,30.0,...,,,,Mountain High,Mageney,Bravefoot,73,0,0,0


---