# Import Packages needed for model development

The main packages that will be used is pandas, tensorflow and torch.

In [None]:
import pandas as pd
from datetime import datetime
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
import random
from torch.utils.data import DataLoader, TensorDataset

## Neural Network Model Setup
We have chosen to use neuro networks to develop a model that can be used to predict the finish position. We have used multiple laywers of linear regression to do this.

In [None]:
class HorsePredictor(nn.Module):
    def __init__(self):
        super(HorsePredictor, self).__init__()
        self.fc1 = nn.Linear(27, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 128)
        self.sigmoid = nn.Sigmoid()
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, 16)
        self.fc6 = nn.Linear(16, 8)
        self.fc7 = nn.Linear(8, 32)
        self.fc8 = nn.Linear(32, 20)


    def forward(self, x):
        x = self.fc1(x)
        # x = self.relu(x)
        x = self.fc2(x)
        # x = self.sigmoid(x)
        x = self.fc3(x)
        # x = self.relu(x)
        x = self.fc4(x)
        x = self.fc5(x)
        x = self.fc6(x)
        x = self.fc7(x)
        x = self.fc8(x)
        return x


# Prediction on the custom data
For this section, asides from the requirement of spliting training and testing data accordingly. We have decided to arbitrary split the data such that we can see if the dataset becomes different, how will our model preform. For any user that may be interested in improving our model, they are welcome to upload their own data and validate the results.

In [None]:
custom_df = pd.read_csv('custom_data.csv', engine='pyarrow')

We first will load the data, and then we decide to drop any columns that we believe will not be helpful for our prediction.

In [None]:
custom_df = custom_df.drop("AgeRestriction", axis=1)
custom_df = custom_df.drop("ClassRestriction", axis=1)
custom_df = custom_df.drop("Disqualified", axis=1)
custom_df = custom_df.drop("FoalingCountry", axis=1)
custom_df = custom_df.drop("FoalingDate", axis=1)
custom_df = custom_df.drop("Gender", axis=1)
custom_df = custom_df.drop("GoingAbbrev", axis=1)
custom_df = custom_df.drop("RaceStartTime", axis=1)
custom_df = custom_df.drop("RacingSubType", axis=1)
custom_df = custom_df.drop("StartType", axis=1)
custom_df = custom_df.drop("CourseIndicator", axis=1)
custom_df = custom_df.drop("Surface", axis=1)
custom_df = custom_df.drop("HandicapType", axis=1)
custom_df = custom_df.drop("RaceGroup", axis=1)
custom_df = custom_df.drop("SexRestriction", axis=1)
custom_df = custom_df.drop("", axis=1)

In [None]:
custom_df = custom_df.apply(pd.to_numeric, errors='coerce')
custom_df = custom_df.dropna()
custom_df = custom_df.astype(float)

Now we will develop a dictionary to sore unique RaceID's so that we can use it to determine the batchs. We chose to do this because we think that it is best to predict base on information provided for each race. One could choose to use batchs directly for simplicity purposes.

In [None]:
# Create a dictionary to store separate DataFrames
custom_dfss = []

# Iterate through unique values in the 'Category' column
for race_id in custom_df['RaceID'].unique():
    # Create a new DataFrame for each unique value
    sub_df = custom_df[custom_df['RaceID'] == race_id]
    custom_dfss.append(sub_df)

In [None]:
custom_df_features = []
custom_df_labels = []
for i in range(len(custom_dfss)):
  feature, label = custom_dfss[i].drop('FinishPosition', axis=1), custom_dfss[i]['FinishPosition']
  custom_df_features.append(feature)
  custom_df_labels.append(label)

In [None]:
custom_feature_tensors = [torch.tensor(df1.values, dtype=torch.float32) for df1 in custom_df_features]
custom_label_tensors = [torch.tensor(df1.values, dtype=torch.long) for df1 in custom_df_labels]

We have developed the features and labels, next we created the tensors needed within the model. We will then run the model to see our results.

In [None]:
model = HorsePredictor()
model.load_state_dict(torch.load('trot_model_state_dict.pth'))

<All keys matched successfully>

Now it is time to evaluate the model. We will run it on the trot horse data, just with different split.

In [None]:
model.eval()  # Set the model to evaluation mode


custom_lst = []

with torch.no_grad():  # Disable gradient computation during validation
    for inputs, labels in zip(custom_feature_tensors, custom_label_tensors):
        outputs = model(inputs)
        predicted = torch.argmax(outputs, 1)
        # predicted = outputs  # Get the predicted class indices
        predicted = predicted.numpy()
        custom_lst.append(predicted)


In [None]:
custom_lsst = []
for pre in custom_lst:
    pr = (1 / pre) / np.sum(1 / pre)
    custom_lsst.append(pr.reshape(-1, 1))

In [None]:
custom_features_numpy = torch.cat(custom_feature_tensors, dim=0).numpy()
predicted_custom_labels = np.vstack(custom_lsst)

In [None]:
predicted_custom_with_labels = np.concatenate((custom_features_numpy, predicted_custom_labels), axis=1)

In [None]:
predicted_custom_df = pd.DataFrame(predicted_custom_with_labels)

In [None]:
predicted_custom_df.columns = ['Barrier', 'BeatenMargin', 'DamID', 'Distance',
       'FrontShoes', 'GoingID', 'HandicapDistance', 'HindShoes', 'HorseAge',
       'HorseID', 'JockeyID', 'PIRPosition', 'PriceSP', 'Prizemoney', 'RaceID',
       'RaceOverallTime', 'RacePrizemoney', 'Saddlecloth', 'SireID',
       'StartingLine', 'TrackID', 'TrainerID', 'NoFrontCover',
       'PositionInRunning', 'WideOffRail', 'WeightCarried', 'WetnessScale', 'win_probability']

In [None]:
predicted_result = predicted_custom_df[['HorseID', 'RaceID', 'win_probability']]

In [None]:
predicted_result

Unnamed: 0,HorseID,RaceID,win_probability
0,1545779.0,1662903.0,0.278330
1,1545897.0,1662903.0,0.139165
2,1547175.0,1662903.0,0.092777
3,1549945.0,1662903.0,0.069583
4,1548420.0,1662903.0,0.055666
...,...,...,...
88184,1563119.0,1666241.0,0.096419
88185,1563782.0,1666241.0,0.385675
88186,1563560.0,1666241.0,0.077135
88187,1564835.0,1666241.0,0.064279


In [None]:
predicted_result.to_csv("predicted_result.csv", index=False)