In [1]:
from sklearn.svm import SVC 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif, chi2

runs = pd.read_pickle("Data/main_1.df")
runs = runs.iloc[int(len(runs)*.1):,:]
num_races = len(np.unique(runs["race_id"]))
FEATURES = ["horse_no", "horse_age", "horse_rating", "declared_weight", "actual_weight", 
            "win_odds", "draw", #"race_size", "distance", "race_class", 
            'last_race_result','win_percent', 
            'avg_distance_time', 'normal_avg_distance_time',
            'going_type_record', 'actual_weight_scaled',
            'declared_weight_scaled', 'horse_race_count', "jockey_record",
            'trainer_record', 'horse_record', 'surface_record', 'place_odds',
            'weight_change', 'weight_change_over_time','weight_change_from_average', 'weight_change_increase',
            'venue_change','venue_record', 'days_since_last_race', 'new_horse',
            'best_odds', 'best_win_percent', 'best_distance_time', 'best_going_record', 
            'best_horse_record', 'best_jockey_record','best_trainer_record', 'highest_actual_weight', 
            'lowest_actual_weight', 'start_speed', 'rode_before']


TARGET = 'won'
#TARGET = 'placed'

X = runs[FEATURES]
y = runs[TARGET]

testPct = 0.2
trainIndex = int(num_races * (1-testPct))
max_race_id = np.unique(runs["race_id"])[trainIndex]
X_train = X.loc[runs["race_id"]<=max_race_id]
y_train = y.loc[runs["race_id"]<=max_race_id]
X_test = X.loc[runs["race_id"]>max_race_id]
y_test = y.loc[runs["race_id"]>max_race_id]
race_sizes_for_eval = [len(runs.loc[runs["race_id"]==race_id]["race_id"]) for race_id in np.unique(runs.loc[runs["race_id"]>max_race_id]["race_id"])]

In [2]:
# Create our evaluate function
def winnerEval(model, x_test, y_test, race_sizes):
    # convert preds into an actual win choice
    winPreds = model.predict_proba(x_test)[:, 1]

    winCount = 0
    temp = 0 
    for i, s in enumerate(race_sizes):
        low_index = temp
        high_index = temp + s
        
        racePreds = winPreds[low_index:high_index]
        raceVals = y_test[low_index:high_index]
        
        if TARGET=='won':
            predWinner = np.argmax(racePreds, axis=0)
            actWinner = np.argmax(raceVals, axis=0)
        
            if predWinner == actWinner:
                winCount += 1
                
        elif TARGET=='placed':
            predPlacers = racePreds.argsort()[-3:]
            actPlacers = raceVals.argsort()[-3:]
            
            for val in actPlacers:
                if val in predPlacers:
                    winCount+=1
            
        temp += s
        

    if TARGET=='won':
        return winCount/float(len(race_sizes))
    else:
        return winCount / float(len(race_sizes)*3)

# DO ANOTHER FUNCTION THAT IS JUST FINDING THE BEST ODDS HORSE
def bestOddsEval(x_test, y_test, race_sizes):
    winCount = 0
    temp = 0 
    for i, s in enumerate(race_sizes):
        low_index = temp
        high_index = temp + s
        
        oddsPreds = x_test[low_index:high_index]
        raceVals = y_test[low_index:high_index]
        
        if TARGET=='won':
            predWinner = np.argmin(oddsPreds["win_odds"], axis=0)
            actWinner = np.argmax(raceVals, axis=0)

            if predWinner == actWinner:
                winCount += 1
            
        elif TARGET=='placed':
            predPlacers = oddsPreds["place_odds"].argsort()[:3].to_list()
            actPlacers = raceVals.argsort()[-3:].to_list()
            
            for val in actPlacers:
                if val in predPlacers:
                    winCount+=1
                    
        temp += s
        
    if TARGET=='won':
        return winCount/float(len(race_sizes))
    else:
        return winCount / float(len(race_sizes)*3)

def randEval(race_sizes):
    randCount = 0 
    for s in race_sizes:
        rand_a = np.random.randint(s)
        rand_b = np.random.randint(s)
        if rand_a==rand_b:
            randCount+=1
    return randCount/float(len(race_sizes))

randAcc = randEval(race_sizes_for_eval)
#oddsAcc = bestOddsEval(X_test, y_test, race_sizes_for_eval)

In [9]:
# Create a neural network
k = 15
svc_model = SVC(C=1, max_iter=5000, probability=True)
svc_pipe = Pipeline([('skb', SelectKBest(chi2, k = k)),
                    ('model', svc_model)])
svc_model.fit(X_train, y_train)

print("SVC accuracy: {:.3f}".format(winnerEval(svc_model, X_test, y_test, race_sizes_for_eval)))

print("Random guessing accuracy: {:.3f}".format(randAcc))
#print("Betting best odds accuracy: {:.3f}".format(oddsAcc))



SVC accuracy: 0.032
Random guessing accuracy: 0.093


In [15]:
print(svc_model.predict_proba( X_test,)[:,1])

[[0.91579483 0.08420517]]


In [16]:
a = svc_model.predict_proba( X_test,)

In [22]:
for v in a[:,1]:
    print(v)

0.0842051718444115
0.08498047162605138
0.07500802617266496
0.07822408356078499
0.07908084902068571
0.0741131415876787
0.07428880772730712
0.07688538977118248
0.09440221523619972
0.07208032162950302
0.08709940249735258
0.07446804112120507
0.08572233842447831
0.08107629118799224
0.0676534271389987
0.07001168802744803
0.06962437739108132
0.06749427439450027
0.08822986500054562
0.07780583098991758
0.06718727720074598
0.06794169120762834
0.08215825321403496
0.08380468324454966
0.07737556984991638
0.07990146656605239
0.07398946397897548
0.06958088177479904
0.06631806942843832
0.06563582197457428
0.0685779352975231
0.0638100830164851
0.06592913186757238
0.07338140365995081
0.06774578759990789
0.070314866009819
0.06336611278419348
0.06529755077785371
0.0769682341103134
0.07222457541985622
0.06479400032173896
0.06258206957581407
0.06344238645056581
0.07203843848863542
0.06388131502772391
0.06224968112991244
0.06504045061277514
0.06449337711244185
0.08007922992125999
0.06339533980380414
0.069263

0.08059254378608591
0.0690457131961843
0.06917546309600539
0.06441302035922584
0.06436161605592523
0.07117580335009854
0.07356224750158985
0.0653641859336277
0.08537442241693627
0.07763930113975435
0.07978169828778539
0.06608351039236401
0.06762524794111706
0.06468258903548126
0.081597279108078
0.07208027474001728
0.0829407456448326
0.08259216136405055
0.09686042224235437
0.07294626325454016
0.09160030608390297
0.08310364818513094
0.10204054068274647
0.09111468219427553
0.09492250509555311
0.07816918303311751
0.07093477905282497
0.06404556279153426
0.06622073427819836
0.06474222106495965
0.06692287770885488
0.06951147106473521
0.06462304130186343
0.06433469721704164
0.07795292533445708
0.07071998704107475
0.08286188865476894
0.06648618470755929
0.0708384236053032
0.06445731271391965
0.06570706910880746
0.06843826918821477
0.06780456309369225
0.06431170586720314
0.06300536011067134
0.06816690573964408
0.06358292360450078
0.06273681786239382
0.062147895043572
0.0643671213329021
0.0674090

0.06687019928297377
0.06742211450474207
0.07268440936926186
0.08326332176540394
0.12806469765959383
0.08497103526127672
0.08116986392542419
0.08015405840265202
0.08440320903531172
0.09992064044494545
0.0795841583654479
0.093246143175347
0.06923830662183861
0.08197450410232311
0.06967224517082359
0.06988108316813989
0.0687284268234688
0.07027440330689026
0.06552528413676421
0.08601456172662027
0.0681957910100078
0.0884193395000003
0.06671986310713619
0.06498753970910673
0.08219628703266987
0.06966788865566294
0.07055312097407232
0.08783322312638428
0.08247328154006713
0.07661771759277818
0.06573994003065088
0.08565233462605497
0.0883153194948896
0.06505232877400881
0.08048712360224244
0.06966863206697117
0.09596473230185992
0.09029545525094425
0.09059708033310572
0.08924380148901143
0.08604006147200305
0.07283701436027654
0.06895971726636532
0.08757256262712892
0.08902358605892212
0.09023337835350738
0.08603629295454511
0.11071047739371546
0.10176417944219421
0.0814828611140834
0.078014

0.08640601214378484
0.06395242640428965
0.08705755096260037
0.06398607989563294
0.07704978534483617
0.06517823391731202
0.07430700543312371
0.06570707911636685
0.07204365647863019
0.06527985140965514
0.07138381980192257
0.0688404503502377
0.06669653925486826
0.07418619149514144
0.06807631138254325
0.06809957014156538
0.07199325348483562
0.06655856315628607
0.06945891609067695
0.08619752948946555
0.08921987773305161
0.08347189847321637
0.06623053763811745
0.0657456277790686
0.08172809268031947
0.08211640261498598
0.06764801941641273
0.06698281355258097
0.06586916383880759
0.06518102502953299
0.08079208831553246
0.09049874035210663
0.06530271961349163
0.06283482552407234
0.08298085484162325
0.06829449679549038
0.0689841155288672
0.06654442022614117
0.07099293631788334
0.06828103342584288
0.06795090108154986
0.07127765462345656
0.06406996819511027
0.06669461603089238
0.07637163623461415
0.0816861734553439
0.09128361629152468
0.08253892114555332
0.08686499514058338
0.08364464463804894
0.09

0.08372674442926215
0.09674258335537748
0.09842035195709173
0.07482522798815575
0.07015701979077744
0.08480775528086136
0.08566474590923667
0.0936276580357961
0.06945942321226788
0.07981337108275616
0.07913731316954407
0.0798027015715514
0.06862905951986886
0.07562684821588875
0.07886750632517613
0.07240687130638612
0.06885402843025634
0.07949161613121292
0.06457308418810974
0.06371287015673463
0.0649242707793819
0.06620908381794653
0.07280385792172986
0.06919111570925032
0.06693624379013631
0.06886804642068133
0.06888887457526516
0.06825426259345437
0.0653756195839407
0.06550165229474755
0.06233559390489163
0.08530984149247657
0.0660337777454625
0.07620810405613068
0.07651285183882797
0.06309032939625102
0.06587260108309735
0.06498700807565316
0.06770726924184921
0.06388508925742338
0.07179812559724788
0.10642555486864096
0.07251913172334087
0.08011119713012244
0.06355794409006707
0.06510948729516931
0.07135281298963102
0.06588811156406899
0.07676575132562698
0.07668822743751769
0.064

0.09422886563120932
0.08617890990100888
0.09803464734019869
0.07598544373456569
0.06835259219756959
0.06739650342460705
0.08989518355110675
0.06679038177166423
0.07051957443609867
0.07290042819466586
0.06546698176988368
0.08363484351704759
0.08348759266471065
0.08566175895622136
0.09152708450481034
0.09504377003904832
0.07633597697570829
0.07021268110986038
0.06641507765156865
0.07530713155712632
0.06654738106235301
0.06862922003230736
0.06942069612031611
0.06857915448588613
0.06735906442809828
0.07148858309492201
0.06559094047307303
0.07636286908925866
0.06469338122512967
0.068106154808259
0.06265576659128998
0.06591506777590574
0.08422911808476681
0.07739659247998552
0.08638271143374482
0.07762517903947345
0.09904072852695786
0.0661147400472834
0.06876552387943839
0.0738424684562911
0.0665616045063832
0.06516173658599632
0.0650306441061365
0.07175715066915084
0.06704100384722618
0.08367006031560666
0.06340774680439956
0.09278887887203953
0.06737462922004812
0.06847144693179151
0.0638