In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.linear_model import Ridge
from scipy import stats
import pickle
from preprocessing import clean_df
import warnings

oddsRelatedCols = ["DecOdds", "Market", "OddsPos", "MinOdds", "AvgOdds", "MaxOdds", "RankFrac", "Vibes", "RedShoes"]

#functions to predict using the models
def classifierPredict(race):
    """picks the most likely candidate to come 1st from a race
    returns the index of the candidate"""
    raceCopy = race.copy() # avoid chaning the globalDF
    with open("classifierFull.obj", "rb") as inFile1: #load in model object
        classifierModel = pickle.load(inFile1)
    classifierProbs = classifierModel.predict_proba(raceCopy)[:, 1] #predict using model, only taking likelihood of getting P1
    return classifierProbs.argmax() # return best candidate

def regFullPredict(race):
    """picks the most likely candidate to come 1st from a race
    returns the index of the candidate"""
    raceCopy = race.copy()
    with open("XGBRegressorFull.obj", "rb") as inFile3:
        regFullModel = pickle.load(inFile3)
    regFullProbs = regFullModel.predict(raceCopy)
    return regFullProbs.argmin()

def regNoOddsPredict(race):
    """picks the most likely candidate to come 1st from a race
    returns the index of the candidate"""
    raceCopy = race.copy()
    raceCopy = raceCopy.drop(oddsRelatedCols, axis = "columns")
    with open("XGBRegressorNoOdds.obj", "rb") as inFile4:
        regNoOddsModel = pickle.load(inFile4)
    regNoOddsProbs = regNoOddsModel.predict(raceCopy)
    return regNoOddsProbs.argmin()

def ridgeNoOddsPredict(race):
    """picks the most likely candidate to come 1st from a race
    returns the index of the candidate"""
    raceCopy = race.copy()
    raceCopy = raceCopy.drop(oddsRelatedCols, axis = "columns")
    with open("ridgeNoOdds.obj", "rb") as inFile2:
        ridgeNoOddsModel = pickle.load(inFile2)
    ridgeProbs = ridgeNoOddsModel.predict(raceCopy)
    return ridgeProbs.argmin()

def ridgeFullPredict(race):
    """picks the most likely candidate to come 1st from a race
    returns the index of the candidate"""
    raceCopy = race.copy()
    with open("ridgeFull.obj", "rb") as inFile5:
        ridgeFullModel = pickle.load(inFile5)
    ridgeProbs = ridgeFullModel.predict(raceCopy)
    return ridgeProbs.argmin()

def racePredicter(race, uncleaned = True):
    """predicts the order of a race given 
    input should be uncleaned unless specicifed"""
    if uncleaned:
        race = clean_df(race)
        race = race.drop("Position", axis = "columns", ) if "Position" in race.columns else None
    
    raceOrder = []

    for _ in range(len(race)): # iterate through the length of the df and pick the best horse each time until we have the worst at the end
        predA, predB, predC, predD, predE = classifierPredict(race = race), regFullPredict(race=race), regNoOddsPredict(race=race), ridgeFullPredict(race = race), ridgeNoOddsPredict(race=race)# assign predictions from each model
        modalIndex = stats.mode([predA, predB, predC, predD])
        if modalIndex[1] == [1]: # case where each model predicts a diff horse
            bestIndex = predB # choose xgregressor as the fallback as it had the best accuracy alone
        elif modalIndex[1] == [2] and len(set([predA, predB, predC, predD, predE])) == 3: # case where 2:2:1 split from the models
            bestIndex = predB
        else:
            bestIndex = modalIndex[0][0]

        bestIndex = race.index[bestIndex]
        race = race.T
        bestCandidate = race.pop(bestIndex) # take the best one and move it into list
        race = race.T
        raceOrder.append(bestCandidate)
    return pd.concat(raceOrder, axis=1).T
            
    

In [5]:
sample = pd.read_csv("sampleRace.csv")
print(racePredicter(sample))

      Raceid  RaceType  Saddle  Age  Ability  Fitness  Conditions  Market  \
3   482820.0       0.0     4.0  3.0      2.0      1.0         1.0     2.0   
11  482823.0       6.0     1.0  3.0      2.0      1.0         1.0     2.0   
9   482820.0       0.0    12.0  3.0      1.0      2.0         2.0     2.0   
12  482823.0       6.0     2.0  5.0      1.0      1.0         1.0     2.0   
2   482820.0       0.0     3.0  7.0      2.0      1.0         2.0     2.0   
14  482823.0       6.0     4.0  5.0      3.0      1.0         3.0     3.0   
8   482820.0       0.0    11.0  3.0      1.0      2.0         1.0     3.0   
5   482820.0       0.0     6.0  3.0      1.0      2.0         2.0     2.0   
7   482820.0       0.0    10.0  3.0      1.0      2.0         2.0     3.0   
6   482820.0       0.0     9.0  3.0      3.0      2.0         2.0     3.0   
13  482823.0       6.0     3.0  4.0      3.0      1.0         3.0     3.0   
1   482820.0       0.0     2.0  4.0      3.0      3.0         2.0     3.0   