# `neural_network_race_full_swaps.ipynb`

### Author: Anthony Hein

#### Last updated: 12/4/2021

# Overview:

Use the neural network which has been trained to predict pairwise winners of a race to now resolve an entire race.

---

## Setup

In [1]:
import git
import os
import re
from typing import List
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
BASE_DIR = git.Repo(os.getcwd(), search_parent_directories=True).working_dir
BASE_DIR

'/Users/anthonyhein/Desktop/SML310/project'

---

## Load `horses_selected_trimmed_clean.csv`

In [3]:
horses_clean = pd.read_csv(f"{BASE_DIR}/data/streamline/horses_selected_trimmed_clean.csv", low_memory=False)
horses_clean.head()

Unnamed: 0,rid,horseName,age,saddle,decimalPrice,isFav,trainerName,jockeyName,position,positionL,...,RPR,TR,OR,father,mother,gfather,weight,res_win,res_place,res_show
0,302858,Kings Return,6.0,4.0,0.6,1,W P Mullins,D J Casey,1,0,...,102.0,,,King's Ride,Browne's Return,Deep Run,73,1,1,0
1,302858,Majestic Red I,6.0,5.0,0.047619,0,John Hackett,Conor O'Dwyer,2,8,...,94.0,,,Long Pond,Courtlough Lady,Giolla Mear,73,0,1,0
2,302858,Clearly Canadian,6.0,2.0,0.166667,0,D T Hughes,G Cotter,3,1.5,...,92.0,,,Nordico,Over The Seas,North Summit,71,0,0,0
3,302858,Bernestic Wonder,8.0,1.0,0.058824,0,E McNamara,J Old Jones,4,dist,...,,,,Roselier,Miss Reindeer,Reindeer,73,0,0,0
4,302858,Beauty's Pride,5.0,6.0,0.038462,0,J J Lennon,T Martin,5,dist,...,,,,Noalto,Elena's Beauty,Tarqogan,66,0,0,0


In [4]:
horses_clean = horses_clean[['rid', 'horseName', 'position', 'res_win', 'res_place', 'res_show']]

---

## Load `X_train_preprocess_without_race.csv`

In [5]:
X_train = pd.read_csv(f"{BASE_DIR}/data/analysis/X_train_preprocess_without_race.csv", low_memory=False)
X_train.head()

Unnamed: 0,horse1_age,horse1_saddle,horse1_decimalPrice,horse1_isFav,horse1_outHandicap,horse1_RPR,horse1_weight,horse1_jockey_d_last_race,horse1_jockey_d_first_race,horse1_jockey_prev_1_position,...,horse2_jockey_prev_3_position_rain,horse2_jockey_prev_1_finishing_time_ratio_rain,horse2_jockey_prev_2_finishing_time_ratio_rain,horse2_jockey_prev_3_finishing_time_ratio_rain,horse2_jockey_prev_1_position_rhum,horse2_jockey_prev_2_position_rhum,horse2_jockey_prev_3_position_rhum,horse2_jockey_prev_1_finishing_time_ratio_rhum,horse2_jockey_prev_2_finishing_time_ratio_rhum,horse2_jockey_prev_3_finishing_time_ratio_rhum
0,0.2,0.230769,0.19697,0.0,0.0,0.73125,0.692308,0.007631,0.236544,0.1,...,0.05,0.805104,0.780139,0.783438,0.1,0.025,0.025,0.805104,0.799642,0.780139
1,0.4,0.038462,0.072222,0.0,0.0,0.73125,0.74359,0.00505,0.236544,0.1,...,0.1,0.801416,0.781238,0.79416,0.075,0.05,0.075,0.801416,0.800487,0.785703
2,0.133333,0.423077,0.098485,0.0,0.0,0.65,0.705128,0.00074,0.237283,0.025,...,0.1,0.799909,0.780139,0.786816,0.025,0.1,0.025,0.799642,0.806486,0.780139
3,0.133333,0.423077,0.098485,0.0,0.0,0.65,0.705128,0.00074,0.237283,0.025,...,0.075,0.800176,0.781623,0.78187,0.05,0.075,0.05,0.801163,0.801416,0.780963
4,0.133333,0.192308,0.090278,0.0,0.0,0.6625,0.730769,0.00074,0.234655,0.05,...,0.1,0.799642,0.783601,0.785468,0.125,0.1,0.025,0.803191,0.805104,0.780139


In [6]:
X_train.shape

(800666, 144)

---

## Load `X_dev_preprocess_without_race.csv`

In [7]:
X_dev = pd.read_csv(f"{BASE_DIR}/data/analysis/X_dev_preprocess_without_race.csv", low_memory=False)
X_dev.head()

Unnamed: 0,horse1_age,horse1_saddle,horse1_decimalPrice,horse1_isFav,horse1_outHandicap,horse1_RPR,horse1_weight,horse1_jockey_d_last_race,horse1_jockey_d_first_race,horse1_jockey_prev_1_position,...,horse2_jockey_prev_3_position_rain,horse2_jockey_prev_1_finishing_time_ratio_rain,horse2_jockey_prev_2_finishing_time_ratio_rain,horse2_jockey_prev_3_finishing_time_ratio_rain,horse2_jockey_prev_1_position_rhum,horse2_jockey_prev_2_position_rhum,horse2_jockey_prev_3_position_rhum,horse2_jockey_prev_1_finishing_time_ratio_rhum,horse2_jockey_prev_2_finishing_time_ratio_rhum,horse2_jockey_prev_3_finishing_time_ratio_rhum
0,0.285714,0.105263,0.2,0.0,0.0,0.608392,0.769231,0.001516,0.629407,0.25,...,0.2,0.828972,0.837641,0.843255,0.075,0.05,0.275,0.834583,0.828972,0.853796
1,0.285714,0.210526,0.157143,0.0,0.0,0.258741,0.769231,0.000445,0.286087,0.075,...,0.075,0.826039,0.826039,0.842799,0.025,0.025,0.075,0.826039,0.826039,0.842799
2,0.285714,0.210526,0.157143,0.0,0.0,0.258741,0.769231,0.000445,0.286087,0.075,...,0.125,0.826413,0.841758,0.844367,0.125,0.05,0.275,0.843955,0.826413,0.841758
3,0.285714,0.210526,0.157143,0.0,0.0,0.258741,0.769231,0.000445,0.286087,0.075,...,0.05,0.832401,0.832114,0.828366,0.1,0.075,0.05,0.832401,0.837064,0.828519
4,0.285714,0.210526,0.157143,0.0,0.0,0.258741,0.769231,0.000445,0.286087,0.075,...,0.175,0.857255,0.835266,0.834946,0.15,0.025,0.2,0.857255,0.826039,0.837392


In [8]:
X_dev.shape

(228766, 144)

---

## Load `X_train_preprocess_without_race.csv`

In [9]:
X_test = pd.read_csv(f"{BASE_DIR}/data/analysis/X_test_preprocess_without_race.csv", low_memory=False)
X_test.head()

Unnamed: 0,horse1_age,horse1_saddle,horse1_decimalPrice,horse1_isFav,horse1_outHandicap,horse1_RPR,horse1_weight,horse1_jockey_d_last_race,horse1_jockey_d_first_race,horse1_jockey_prev_1_position,...,horse2_jockey_prev_3_position_rain,horse2_jockey_prev_1_finishing_time_ratio_rain,horse2_jockey_prev_2_finishing_time_ratio_rain,horse2_jockey_prev_3_finishing_time_ratio_rain,horse2_jockey_prev_1_position_rhum,horse2_jockey_prev_2_position_rhum,horse2_jockey_prev_3_position_rhum,horse2_jockey_prev_1_finishing_time_ratio_rhum,horse2_jockey_prev_2_finishing_time_ratio_rhum,horse2_jockey_prev_3_finishing_time_ratio_rhum
0,0.230769,0.333333,0.03268,0.0,0.0,0.344828,0.74359,0.001053,0.49501,0.225,...,0.2,0.865544,0.847264,0.853154,0.025,0.2,0.3,0.840533,0.850856,0.864844
1,0.230769,0.333333,0.03268,0.0,0.0,0.344828,0.74359,0.001053,0.49501,0.225,...,0.15,0.864781,0.840533,0.855519,0.025,0.025,0.025,0.840533,0.840533,0.840533
2,0.230769,0.333333,0.03268,0.0,0.0,0.344828,0.74359,0.001053,0.49501,0.225,...,0.075,0.838534,0.843337,0.846422,0.2,0.1,0.125,0.853412,0.842583,0.847859
3,0.230769,0.333333,0.03268,0.0,0.0,0.344828,0.74359,0.001053,0.49501,0.225,...,0.25,0.851685,0.849673,0.854624,0.15,0.25,0.2,0.849673,0.854624,0.858051
4,0.230769,0.333333,0.03268,0.0,0.0,0.344828,0.74359,0.001053,0.49501,0.225,...,0.25,0.847892,0.842776,0.862423,0.175,0.075,0.275,0.853634,0.844529,0.862845


In [10]:
X_test.shape

(114392, 144)

---

## Load `races_selected_trimmed_clean.csv`

In [11]:
races_clean = pd.read_csv(f"{BASE_DIR}/data/streamline/races_selected_trimmed_clean.csv", low_memory=False)
races_clean.head()

Unnamed: 0,rid,course,title,winningTime,metric,ncond,class,runners,margin,1st_place_rank_in_odds,...,station name,station lat,station lng,dist to station,station reading date,temp,msl,rain,rhum,station reading timedelta
0,302858,Thurles,Liffey Maiden Hurdle (Div 1),277.2,3821.0,1,0,6,1.219263,1,...,BIRR,53.0525,-7.5325,45.288813,1/9/97 12:00,1.6,1012.4,0.0,87,15.0
1,291347,Punchestown,Ericsson G.S.M. Grand National Trial Handicap ...,447.2,5229.0,5,0,9,1.218049,4,...,CASEMENT,53.182,-6.262,24.477602,2/16/97 15:00,8.0,992.5,0.4,87,20.0
2,75447,Listowel,Ballybunion E.B.F. Beginners S'chase,318.4,3620.0,5,0,8,1.27732,3,...,SHANNON AIRPORT,52.4125,-8.5505,63.534139,3/1/97 14:00,12.0,1003.5,0.0,73,0.0
3,358038,Punchestown,Quinns Of Baltinglass Chase (La Touche) (Cross...,533.9,6637.0,1,0,10,1.286595,1,...,CASEMENT,53.182,-6.262,24.477602,4/24/97 14:00,12.6,1011.9,0.0,72,20.0
4,89211,Tipperary,Topaz Sprint Stakes (Listed),59.9,1005.0,4,0,5,1.217043,4,...,SHANNON AIRPORT,52.4125,-8.5505,25.222137,5/8/97 17:00,11.1,994.2,0.0,59,30.0


---

## Load Identifying Info

In [12]:
X_train_identifiers = pd.read_csv(f"{BASE_DIR}/data/analysis/X_train_identifiers.csv", low_memory=False)
X_train_identifiers.head()

Unnamed: 0,rid,horse1_horseName,horse2_horseName
0,341451,Dance Design,Idris
1,341451,Idris,Dance Design
2,50025,Azra,Johan Cruyff
3,50025,Azra,Beautiful Fire
4,50025,Johan Cruyff,Azra


In [13]:
X_dev_identifiers = pd.read_csv(f"{BASE_DIR}/data/analysis/X_dev_identifiers.csv", low_memory=False)
X_dev_identifiers.head()

Unnamed: 0,rid,horse1_horseName,horse2_horseName
0,159686,Gussy Goose,Ibergman
1,159686,Mothers Finest,Emergent
2,159686,Mothers Finest,Gussy Goose
3,159686,Mothers Finest,Just Joan
4,159686,Mothers Finest,Rayisa


In [14]:
X_test_identifiers = pd.read_csv(f"{BASE_DIR}/data/analysis/X_test_identifiers.csv", low_memory=False)
X_test_identifiers.head()

Unnamed: 0,rid,horse1_horseName,horse2_horseName
0,136782,Sestriere,Associate Rock
1,136782,Sestriere,Fridtjof Nansen
2,136782,Sestriere,Pillar
3,136782,Sestriere,Captain Power
4,136782,Sestriere,Frow


---

## Find Races w/ All Runners Present

The correctness of this function is demonstrated elsewhere.

In [15]:
def runners_to_num_pairs(runners: int) -> int:
    """
    Calculates the number of 2-tuple permutations
    for a given amount runners.
    """
    return runners * (runners - 1)

In [16]:
def find_races_all_runners_present(df: pd.core.frame.DataFrame) -> List[int]:
    df = df.groupby("rid").count()
    df = df.merge(races_clean[['rid', 'runners']], on='rid')
    df['permutations'] = df['runners'].map(runners_to_num_pairs)
    return list(df[df['permutations'] == df['horse1_horseName']]['rid'])

In [17]:
X_train_all_runners_present = find_races_all_runners_present(X_train_identifiers)
len(X_train_all_runners_present)

3212

In [18]:
X_dev_all_runners_present = find_races_all_runners_present(X_dev_identifiers)
len(X_dev_all_runners_present)

1012

In [19]:
X_test_all_runners_present = find_races_all_runners_present(X_test_identifiers)
len(X_test_all_runners_present)

360

---

## Load Neural Network

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Set the device to use
# CUDA refers to the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Fixing Random Seed for Reproducibility
torch.manual_seed(0)
np.random.seed(0)

In [21]:
X_train_plus_identifiers = X_train.join(X_train_identifiers)
X_dev_plus_identifiers = X_dev.join(X_dev_identifiers)
X_test_plus_identifiers = X_test.join(X_test_identifiers)

In [22]:
X_train_plus_identifiers = X_train_plus_identifiers[X_train_plus_identifiers['rid'].isin(X_train_all_runners_present)]
X_dev_plus_identifiers = X_dev_plus_identifiers[X_dev_plus_identifiers['rid'].isin(X_dev_all_runners_present)]
X_test_plus_identifiers = X_test_plus_identifiers[X_test_plus_identifiers['rid'].isin(X_test_all_runners_present)]

In [23]:
X_train.shape

(800666, 144)

In [24]:
X_train_plus_identifiers.shape

(239188, 147)

In [25]:
X_train_plus_identifiers.head()

Unnamed: 0,horse1_age,horse1_saddle,horse1_decimalPrice,horse1_isFav,horse1_outHandicap,horse1_RPR,horse1_weight,horse1_jockey_d_last_race,horse1_jockey_d_first_race,horse1_jockey_prev_1_position,...,horse2_jockey_prev_3_finishing_time_ratio_rain,horse2_jockey_prev_1_position_rhum,horse2_jockey_prev_2_position_rhum,horse2_jockey_prev_3_position_rhum,horse2_jockey_prev_1_finishing_time_ratio_rhum,horse2_jockey_prev_2_finishing_time_ratio_rhum,horse2_jockey_prev_3_finishing_time_ratio_rhum,rid,horse1_horseName,horse2_horseName
46,0.2,0.115385,0.098485,0.0,0.0,0.29375,0.692308,8e-06,0.261505,0.1,...,0.780139,0.025,0.025,0.025,0.799642,0.799642,0.780139,337765,Plaza De Toros,Mingling Glances
47,0.2,0.115385,0.098485,0.0,0.0,0.29375,0.692308,8e-06,0.261505,0.1,...,0.788387,1.0,0.075,0.05,0.868119,0.80254,0.784381,337765,Plaza De Toros,Johan Cruyff
48,0.2,0.076923,0.541667,1.0,0.0,0.64375,0.692308,8e-06,0.235628,0.05,...,0.788387,1.0,0.075,0.05,0.868119,0.80254,0.784381,337765,Olympic Majesty,Johan Cruyff
49,0.2,0.038462,0.12037,0.0,0.0,0.6875,0.692308,0.00034,0.264441,0.05,...,0.785794,0.05,0.075,0.075,0.803869,0.814859,0.781684,337765,Johan Cruyff,Olympic Majesty
50,0.2,0.038462,0.12037,0.0,0.0,0.6875,0.692308,0.00034,0.264441,0.05,...,0.780139,0.025,0.025,0.025,0.799642,0.799642,0.780139,337765,Johan Cruyff,Mingling Glances


In [26]:
drop_cols = ['rid', 'horse1_horseName', 'horse2_horseName']

X_train_only_all_runners_present = X_train_plus_identifiers.drop(columns=drop_cols)
X_dev_only_all_runners_present = X_dev_plus_identifiers.drop(columns=drop_cols)
X_test_only_all_runners_present = X_test_plus_identifiers.drop(columns=drop_cols)

In [30]:
X_train_tensor = torch.from_numpy(X_train_only_all_runners_present.to_numpy()).float().to(device)
X_dev_tensor = torch.from_numpy(X_dev_only_all_runners_present.to_numpy()).float().to(device)
X_test_tensor = torch.from_numpy(X_test_only_all_runners_present.to_numpy()).float().to(device)

In [31]:
class Net(nn.Module):
    def __init__(self, input_size, num_layers, layers_size):
        super(Net, self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, layers_size)])
        self.hidden_layers.extend([nn.Linear(layers_size, layers_size) for i in range(1, num_layers-1)])
        self.output = nn.Linear(layers_size, 1)
        self.activation = nn.ReLU()
    
    def forward(self, x):
        z = x
        for layer in self.hidden_layers:
            z = self.activation(layer(z))
        return self.output(z)

In [33]:
net = Net(X_train_tensor.shape[1], 1, 150)
net.load_state_dict(torch.load(f"{BASE_DIR}/data/analysis/neural_network/net_all_features_150_nodes.bin"))
net.eval()

Net(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=144, out_features=150, bias=True)
  )
  (output): Linear(in_features=150, out_features=1, bias=True)
  (activation): ReLU()
)

## Compute Predictions

The predicted winner will have won the most pairwise matchups

In [34]:
from collections import defaultdict

In [35]:
with torch.no_grad():
    train_preds = torch.sigmoid(net.forward(X_train_tensor))
    dev_preds = torch.sigmoid(net.forward(X_dev_tensor))
    test_preds = torch.sigmoid(net.forward(X_test_tensor))

In [36]:
X_train_plus_identifiers['predictions'] = train_preds
X_dev_plus_identifiers['predictions'] = dev_preds
X_test_plus_identifiers['predictions'] = test_preds

In [37]:
X_train_plus_identifiers[['rid', 'horse1_horseName', 'horse2_horseName', 'predictions']].head()

Unnamed: 0,rid,horse1_horseName,horse2_horseName,predictions
46,337765,Plaza De Toros,Mingling Glances,2.578393e-07
47,337765,Plaza De Toros,Johan Cruyff,2.090218e-08
48,337765,Olympic Majesty,Johan Cruyff,0.1747337
49,337765,Johan Cruyff,Olympic Majesty,0.936053
50,337765,Johan Cruyff,Mingling Glances,0.9993711


In [38]:
X_train_predictions = {}

for rid in X_train_all_runners_present:

    race_d = defaultdict(lambda: 0)
    
    df = X_train_plus_identifiers[X_train_plus_identifiers['rid'] == rid]
    
    for _, row in df.iterrows():
        race_d[row['horse1_horseName']] += row['predictions']
        race_d[row['horse2_horseName']] += 1 - row['predictions']
        
    X_train_predictions[rid] = race_d

In [39]:
X_dev_predictions = {}

for rid in X_dev_all_runners_present:

    race_d = defaultdict(lambda: 0)
    
    df = X_dev_plus_identifiers[X_dev_plus_identifiers['rid'] == rid]
    
    for _, row in df.iterrows():
        race_d[row['horse1_horseName']] += row['predictions']
        race_d[row['horse2_horseName']] += 1 - row['predictions']
        
    X_dev_predictions[rid] = race_d

In [40]:
X_test_predictions = {}

for rid in X_test_all_runners_present:

    race_d = defaultdict(lambda: 0)
    
    df = X_test_plus_identifiers[X_test_plus_identifiers['rid'] == rid]
    
    for _, row in df.iterrows():
        race_d[row['horse1_horseName']] += row['predictions']
        race_d[row['horse2_horseName']] += 1 - row['predictions']
        
    X_test_predictions[rid] = race_d

---

## Evaluate Performance

We need a helper function to count the number of positions away from the correct position per runner.

In [61]:
def average_distance_to_correct_position(golds: List[str], preds: List[str]) -> float:
    cumsum = 0
    for i, horse_name in enumerate(preds):
        cumsum += abs(i - golds.index(horse_name))
    return cumsum / len(golds)

In [47]:
rid = X_train_all_runners_present[0]
rid

330

In [48]:
horses_clean[horses_clean['rid'] == rid].sort_values('position')

Unnamed: 0,rid,horseName,position,res_win,res_place,res_show
69048,330,Gaudeamus,1,1,1,1
69049,330,The Real Thing,2,0,1,1
69050,330,Simonetta,3,0,0,1
69051,330,Pelican Waters,4,0,0,0
69052,330,Liscanna,5,0,0,0
69053,330,Evening Rushour,6,0,0,0
69054,330,Miss Beatrix,7,0,0,0
69055,330,Moverra,8,0,0,0


In [49]:
list(horses_clean[horses_clean['rid'] == rid].sort_values('position')['horseName'])

['Gaudeamus',
 'The Real Thing',
 'Simonetta',
 'Pelican Waters',
 'Liscanna',
 'Evening Rushour',
 'Miss Beatrix',
 'Moverra']

In [50]:
X_train_predictions[rid]

defaultdict(<function __main__.<lambda>()>,
            {'Miss Beatrix': 2.1984984621522017,
             'Gaudeamus': 13.400523335521939,
             'Pelican Waters': 9.596822452891502,
             'Liscanna': 5.6697241982328705,
             'Simonetta': 9.03593014631042,
             'Evening Rushour': 4.510214005771559,
             'Moverra': 0.01298514275731577,
             'The Real Thing': 11.575302256362193})

In [51]:
d_race = X_train_predictions[rid]
sorted(list(d_race.keys()), key=lambda x: d_race[x], reverse=True)

['Gaudeamus',
 'The Real Thing',
 'Pelican Waters',
 'Simonetta',
 'Liscanna',
 'Evening Rushour',
 'Miss Beatrix',
 'Moverra']

In [62]:
average_distance_to_correct_position(
    ['Gaudeamus', 'The Real Thing', 'Simonetta', 'Pelican Waters', 'Liscanna', 'Evening Rushour', 'Miss Beatrix', 'Moverra'],
    ['Gaudeamus', 'The Real Thing', 'Pelican Waters', 'Simonetta', 'Liscanna', 'Evening Rushour', 'Miss Beatrix', 'Moverra']
)

0.25

In [64]:
average_distance_to_correct_position(
    ['Gaudeamus', 'The Real Thing', 'Simonetta', 'Pelican Waters'],
    ['Gaudeamus', 'The Real Thing', 'Pelican Waters', 'Simonetta']
)

0.5

In [65]:
average_distance_to_correct_position(
    ['Gaudeamus', 'Miss Beatrix', 'The Real Thing', 'Simonetta', 'Pelican Waters', 'Evening Rushour', 'Liscanna', 'Moverra'],
    ['Gaudeamus', 'Pelican Waters', 'Simonetta', 'Liscanna', 'Evening Rushour', 'Miss Beatrix', 'Moverra', 'The Real Thing']
)

2.25

In [66]:
train_acc = []
dev_acc = []
test_acc = []

for rid in X_train_all_runners_present:
    
    l1 = list(horses_clean[horses_clean['rid'] == rid].sort_values('position')['horseName'])
    
    d_race = X_train_predictions[rid]
    l2 = sorted(list(d_race.keys()), key=lambda x: d_race[x], reverse=True)
    
    train_acc.append(average_distance_to_correct_position(l1, l2))
    
for rid in X_dev_all_runners_present:
    
    l1 = list(horses_clean[horses_clean['rid'] == rid].sort_values('position')['horseName'])
    
    d_race = X_dev_predictions[rid]
    l2 = sorted(list(d_race.keys()), key=lambda x: d_race[x], reverse=True)
    
    dev_acc.append(average_distance_to_correct_position(l1, l2))
    
for rid in X_test_all_runners_present:
    
    l1 = list(horses_clean[horses_clean['rid'] == rid].sort_values('position')['horseName'])
    
    d_race = X_test_predictions[rid]
    l2 = sorted(list(d_race.keys()), key=lambda x: d_race[x], reverse=True)
    
    test_acc.append(average_distance_to_correct_position(l1, l2))

In [67]:
np.mean(train_acc)

0.32323223856536437

In [68]:
np.mean(dev_acc)

0.3786664312850083

In [69]:
np.mean(test_acc)

0.428461091377758

In [70]:
rid = X_test_all_runners_present[0]
horses_clean[horses_clean['rid'] == rid].sort_values('position')

Unnamed: 0,rid,horseName,position,res_win,res_place,res_show
20723,1163,Southern France,1,1,1,0
20724,1163,Downdraft,2,0,1,0
20725,1163,Master Of Reality,3,0,0,0
20726,1163,Capri,4,0,0,0
20727,1163,Cimeara,5,0,0,0
20728,1163,Eminent Authority,6,0,0,0


In [71]:
X_test_predictions[rid]

defaultdict(<function __main__.<lambda>()>,
            {'Downdraft': 6.784536622202722,
             'Capri': 6.004254263229086,
             'Southern France': 9.623282447406382,
             'Master Of Reality': 5.563652929151431,
             'Eminent Authority': 0.05549500449706102,
             'Cimeara': 1.968778733513318})

In [72]:
test_acc[0]

0.3333333333333333

---