In [33]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sklearn.preprocessing as preprocessing
import sklearn.model_selection as model_selection
import matplotlib.pyplot as plt
import keras
from keras.models import model_from_json
import pickle
from collections import OrderedDict

In [3]:
# load json and create model
model_json = 'model-128-256-128-64-19(all relu)-20210112-23-21.json'
json_file = open(model_json, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)


In [4]:
print(model)

<tensorflow.python.keras.engine.sequential.Sequential object at 0x000001B599E896D0>


In [5]:
# load weights into new model
model_name = 'model-128-256-128-64-19(all relu)-20210112-23-21-epoch=145-val_loss=0.046082.hdf5'
model.load_weights(model_name)

In [7]:
# load standard scaler
ss = pickle.load(open('StandardScaler.pkl', 'rb'))

In [17]:
# load LabelEncoder for Venue Name
venue_encoder_name = 'VenueName_Encoder.pkl'
venue_le = pickle.load(open(venue_encoder_name, 'rb'))

In [18]:
# load LabelEncoder for row
row_encoder_name = 'Row_Encoder.pkl'
row_le = pickle.load(open(row_encoder_name, 'rb'))

In [19]:
# load LabelEncoder for Trainer
trainer_encoder_name = 'Trainer_Encoder.pkl'
trainer_le = pickle.load(open(trainer_encoder_name, 'rb'))

In [20]:
# load LabelEncoder for Driver
driver_encoder_name = 'Driver_Encoder.pkl'
driver_le = pickle.load(open(driver_encoder_name, 'rb'))

In [57]:
future_races_df = pd.read_csv('dataset4(AutoRecovered).csv', index_col=False)
print(future_races_df)

          day    venuename  raceno  weather trackcondition         runnername  \
0    1/2/2016  Albion Park       1        3           GOOD        Jack Malone   
1    1/2/2016  Albion Park       1        3           GOOD              Vader   
2    1/2/2016  Albion Park       1        3           GOOD     Adriatic Coast   
3    1/2/2016  Albion Park       1        3           GOOD         Tuxedo Max   
4    1/2/2016  Albion Park       1        3           GOOD        True Desire   
..        ...          ...     ...      ...            ...                ...   
190  1/2/2016      Geelong       7        1           GOOD    Desdon Murruffy   
191  1/2/2016      Geelong       7        1           GOOD  Charlie Machsheen   
192  1/2/2016      Geelong       7        1           GOOD        Abettorpunt   
193  1/2/2016      Geelong       7        1           GOOD    Glenferrie Hood   
194  1/2/2016      Geelong       7        1           GOOD    Hilltop Hustler   

     win_odds_bsp  poolsize

In [58]:
# choose the important features from dataframe
feature_df = future_races_df[['daycalender', 'racenumber', 'venue', 'racedistance', 'horseid', 'horsename', 'row', 'trainer', 'driver', 'handicap', 'age']].copy()
feature_df = feature_df.rename(columns={"daycalender": "day", "venue": "venuename", "racenumber": "raceno"})


          day  raceno    venuename  racedistance  horseid          horsename  \
0    1/2/2016       1  Albion Park          1660   773852        JACK MALONE   
1    1/2/2016       1  Albion Park          1660   770866              VADER   
2    1/2/2016       1  Albion Park          1660   787614     ADRIATIC COAST   
3    1/2/2016       1  Albion Park          1660   792645         TUXEDO MAX   
4    1/2/2016       1  Albion Park          1660   541224        TRUE DESIRE   
..        ...     ...          ...           ...      ...                ...   
190  1/2/2016       7      Geelong          2570   762083    DESDON MURRUFFY   
191  1/2/2016       7      Geelong          2570   228737  CHARLIE MACHSHEEN   
192  1/2/2016       7      Geelong          2570   219387        ABETTORPUNT   
193  1/2/2016       7      Geelong          2570   779693    GLENFERRIE HOOD   
194  1/2/2016       7      Geelong          2570   785282    HILLTOP HUSTLER   

     row          trainer        driver

In [59]:
# apply Encoders
# venue encoder
feature_df['venuename'] = venue_le.transform(feature_df['venuename'])
# row encoder
feature_df['row'] = row_le.transform(feature_df['row'])
# trainer encoder
feature_df['trainer'] = trainer_le.transform(feature_df['trainer'])
# driver encoder
feature_df['driver'] = driver_le.transform(feature_df['driver'])

In [60]:
# remove duplicated rows
feature_df = feature_df.drop_duplicates()

In [61]:
print(feature_df)

          day  raceno  venuename  racedistance  horseid          horsename  \
0    1/2/2016       1          1          1660   773852        JACK MALONE   
1    1/2/2016       1          1          1660   770866              VADER   
2    1/2/2016       1          1          1660   787614     ADRIATIC COAST   
3    1/2/2016       1          1          1660   792645         TUXEDO MAX   
4    1/2/2016       1          1          1660   541224        TRUE DESIRE   
..        ...     ...        ...           ...      ...                ...   
190  1/2/2016       7         34          2570   762083    DESDON MURRUFFY   
191  1/2/2016       7         34          2570   228737  CHARLIE MACHSHEEN   
192  1/2/2016       7         34          2570   219387        ABETTORPUNT   
193  1/2/2016       7         34          2570   779693    GLENFERRIE HOOD   
194  1/2/2016       7         34          2570   785282    HILLTOP HUSTLER   

     row  trainer  driver  handicap  age  
0      6     2511   

In [95]:
# Create Pivot table

# choose 2 features from race : venuename and racedistance.
# choose 6 features from horses : horseid, row, trainer, driver, handicap, age
# maximum horse number of race is 19.
# total feature number of each race will be 6 * 19 + 2 = 116
group_df = feature_df.groupby(['day', 'raceno', 'venuename', 'racedistance'])
print(group_df)

columns = ['venuename', 'racedistance']
extend_columns = ['venuename', 'racedistance', 'day']
common_columns = ['horseid', 'row', 'trainer', 'driver', 'handicap', 'age']
extend_common_columns = ['horseid', 'horsename', 'row', 'trainer', 'driver', 'handicap', 'age']

max_number = 19
for i in range(1, max_number + 1):
    ith_columns = []
    for column in common_columns:
        ith_columns.append(column + str(i))
    columns += ith_columns
test_df = pd.DataFrame(columns = columns)


for i in range(1, max_number + 1):
    ith_columns = []
    for column in extend_common_columns:
        ith_columns.append(column + str(i))
    extend_columns += ith_columns

place_columns = []
for i in range(1, max_number + 1):
    place_columns.append('place' + str(i))
extend_columns += place_columns

extend_df = pd.DataFrame(columns = extend_columns)
print(extend_columns)


for group_name, df in group_df:
    # print(group_df)
    day, raceno, venuename, racedistance = group_name
    ext_item = OrderedDict()
    item = OrderedDict()
    ext_item['venuename'] = item['venuename'] = venuename
    ext_item['racedistance'] = item['racedistance'] = racedistance
    ext_item['day'] = day
    index = 1
    
    for i, row in df.iterrows():
        ext_item['horseid' + str(index)] = item['horseid' + str(index)] = row['horseid']
        ext_item['row' + str(index)] = item['row' + str(index)] = row['row']
        ext_item['trainer' + str(index)] = item['trainer' + str(index)] = row['trainer']
        ext_item['driver' + str(index)] = item['driver' + str(index)] = row['driver']
        ext_item['horseid' + str(index)] = item['horseid' + str(index)] = row['horseid']
        ext_item['handicap' + str(index)] = item['handicap' + str(index)] = row['handicap']
        ext_item['age' + str(index)] = item['age' + str(index)] = row['age']
        ext_item['horsename' + str(index)] = row['horsename']
        index += 1
    ext_item['horsecnt'] = index - 1
    if index >= max_number:
        continue
    for index1 in range(index, max_number+1):
        ext_item['horseid' + str(index1)] = item['horseid' + str(index1)] = 0
        ext_item['row' + str(index1)] = item['row' + str(index1)] = 0
        ext_item['trainer' + str(index1)] = item['trainer' + str(index1)] = 0
        ext_item['driver' + str(index1)] = item['driver' + str(index1)] = 0
        ext_item['horseid' + str(index1)] = item['horseid' + str(index1)] = 0
        ext_item['handicap' + str(index1)] = item['handicap' + str(index1)] = 0
        ext_item['age' + str(index1)] = item['age' + str(index1)] = 0
    test_df = test_df.append(item, ignore_index = True)
    extend_df = extend_df.append(ext_item, ignore_index = True)
test_df = test_df.fillna(0)
extend_df = extend_df.fillna(0)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001B5CD1A0670>
['venuename', 'racedistance', 'day', 'horseid1', 'horsename1', 'row1', 'trainer1', 'driver1', 'handicap1', 'age1', 'horseid2', 'horsename2', 'row2', 'trainer2', 'driver2', 'handicap2', 'age2', 'horseid3', 'horsename3', 'row3', 'trainer3', 'driver3', 'handicap3', 'age3', 'horseid4', 'horsename4', 'row4', 'trainer4', 'driver4', 'handicap4', 'age4', 'horseid5', 'horsename5', 'row5', 'trainer5', 'driver5', 'handicap5', 'age5', 'horseid6', 'horsename6', 'row6', 'trainer6', 'driver6', 'handicap6', 'age6', 'horseid7', 'horsename7', 'row7', 'trainer7', 'driver7', 'handicap7', 'age7', 'horseid8', 'horsename8', 'row8', 'trainer8', 'driver8', 'handicap8', 'age8', 'horseid9', 'horsename9', 'row9', 'trainer9', 'driver9', 'handicap9', 'age9', 'horseid10', 'horsename10', 'row10', 'trainer10', 'driver10', 'handicap10', 'age10', 'horseid11', 'horsename11', 'row11', 'trainer11', 'driver11', 'handicap11', 'age11', 'horseid12', 'h

In [96]:
print(extend_df)

    venuename  racedistance       day  horseid1         horsename1  row1  \
0           1          1660  1/2/2016    773852        JACK MALONE     6   
1          34          2100  1/2/2016    772434       MACTERRA LAD     3   
2           1          2138  1/2/2016    768592        CEE UIN L A     1   
3          34          2100  1/2/2016    779658           EMMANUEL     4   
4           1          1660  1/2/2016    346189          MONARKMAC     1   
5          34          2100  1/2/2016    786544        CRUZ BROMAC     1   
6           1          2138  1/2/2016    762784     YERRINGTON BOB     1   
7          34          1609  1/2/2016    767546       PIPE ME HOME     5   
8           1          1660  1/2/2016    762540     GALACTIC EAGLE    14   
9          34          2570  1/2/2016    772957    DEAD CAT BOUNCE     1   
10          1          2138  1/2/2016    762546            BEBRAVE     4   
11         34          2100  1/2/2016    776469    ELLAS PUPPETEER     6   
12          

In [72]:
test_x = pd.DataFrame(ss.transform(test_df),columns = test_df.columns)
print(test_x)

    venuename  racedistance  horseid1      row1  trainer1   driver1  \
0   -1.632510     -1.104179  0.116048 -0.085988  0.892568  0.929368   
1   -0.398371      0.531545  0.103234 -0.858562 -1.642511 -1.654867   
2   -1.632510      0.672812  0.068514 -1.373611  1.104255  1.198184   
3   -0.398371      0.531545  0.168516 -0.601037  0.481586  0.515059   
4   -1.632510     -1.104179 -3.748690 -1.373611 -0.424021  1.555681   
5   -0.398371      0.531545  0.230744 -1.373611 -0.856688  0.799117   
6   -1.632510      0.672812  0.016028 -1.373611  1.335562  1.487784   
7   -0.398371     -1.293774  0.059061 -0.343513 -1.181963 -1.196217   
8   -1.632510     -1.104179  0.013823  1.974209  1.335562  1.487784   
9   -0.398371      2.278796  0.107960 -1.373611  0.729415 -1.196217   
10  -1.632510      0.672812  0.013877 -0.601037  1.715566  1.487784   
11  -0.398371      0.531545  0.139697 -0.085988 -0.309400 -0.465980   
12  -1.632510      0.672812 -3.755866 -0.858562  1.215778 -0.407783   
13  -0

In [97]:
test_x = test_x.to_numpy()
y_predictions = model.predict(test_x)

AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [98]:
print(y_predictions)

[[ 1.14910431e-01  7.99550563e-02  1.03296697e-01  1.67438284e-01
   6.51310161e-02  9.23171490e-02  8.08734074e-02  1.11170083e-01
   1.14020213e-01  8.20027292e-02 -7.04612024e-03 -8.85409303e-03
   1.51349790e-03  2.11975537e-04 -1.31811947e-04 -2.35088402e-04
  -7.53768254e-05  4.51543892e-05  1.43030848e-05]
 [ 1.84639454e-01  9.79956463e-02  7.20942616e-02  9.34953392e-02
   6.08942695e-02  5.84302768e-02  7.59698302e-02  8.92482027e-02
   7.44279474e-02  5.97007275e-02  7.84924105e-02  5.56542464e-02
   5.68299089e-04 -2.21226178e-03 -3.39953881e-03  4.54889727e-04
  -1.19365708e-04 -2.36019303e-04 -2.23563577e-04]
 [ 6.18832856e-02  1.48454815e-01  6.68087453e-02  1.25554845e-01
   1.04465634e-01  5.99144883e-02  7.84393474e-02  6.06465414e-02
   1.27896130e-01  6.92438185e-02  8.25647041e-02  1.56111456e-03
  -1.81824714e-03 -5.53831458e-04 -3.26309260e-03  2.98616942e-04
   5.27637312e-05 -5.04618365e-05  3.31386327e-05]
 [ 1.47349402e-01  1.98253378e-01  1.23966798e-01  1.55

In [99]:
index = 0
for i, row in extend_df.iterrows():
    horsecnt = int(row['horsecnt'])
    p_sum = sum(y_predictions[index][0:horsecnt])
    print(p_sum)
    for j in range(1, horsecnt+1):
        extend_df.loc[i, 'place' + str(j)] = y_predictions[index][j-1] * 100 / p_sum
    index += 1

1.0111150667071342
1.0010426118969917
0.9858723543584347
0.9706894624978304
1.0007696077227592
0.9860665798187256
1.0121343340724707
0.9834569245576859
0.9882650375366211
0.9948965236544609
1.0069478414952755
1.0003139935433865
0.9873202443122864
0.9922156967222691
0.9997829496860504
0.9723258689045906


In [100]:
print(extend_df)

    venuename  racedistance       day  horseid1         horsename1  row1  \
0           1          1660  1/2/2016    773852        JACK MALONE     6   
1          34          2100  1/2/2016    772434       MACTERRA LAD     3   
2           1          2138  1/2/2016    768592        CEE UIN L A     1   
3          34          2100  1/2/2016    779658           EMMANUEL     4   
4           1          1660  1/2/2016    346189          MONARKMAC     1   
5          34          2100  1/2/2016    786544        CRUZ BROMAC     1   
6           1          2138  1/2/2016    762784     YERRINGTON BOB     1   
7          34          1609  1/2/2016    767546       PIPE ME HOME     5   
8           1          1660  1/2/2016    762540     GALACTIC EAGLE    14   
9          34          2570  1/2/2016    772957    DEAD CAT BOUNCE     1   
10          1          2138  1/2/2016    762546            BEBRAVE     4   
11         34          2100  1/2/2016    776469    ELLAS PUPPETEER     6   
12          

In [108]:
for i, row in extend_df.iterrows():
    print("Race Date : " + row['day'])
    venuename = venue_le.inverse_transform([int(row['venuename'])])[0]
    print("Venue Name : " + venuename)
    print("Distance : " + str(row['racedistance']))
    horsecnt = int(row['horsecnt'])
    for index in  range(1, horsecnt+1):
        horsename = row['horsename' + str(index)]
        place = round(row['place' + str(index)],1)
        print("\t" + row['horsename' + str(index)] + "\t : " + str(place))

Race Date : 1/2/2016
Venue Name : Albion Park
Distance : 1660
	JACK MALONE	 : 11.4
	VADER	 : 7.9
	ADRIATIC COAST	 : 10.2
	TUXEDO MAX	 : 16.6
	TRUE DESIRE	 : 6.4
	FOREST FURY	 : 9.1
	END OF THE MATTER	 : 8.0
	LORD JONES	 : 11.0
	BLESSED IS THE BOY	 : 11.3
	ERNIE BARRASSO	 : 8.1
Race Date : 1/2/2016
Venue Name : Geelong
Distance : 2100
	MACTERRA LAD	 : 18.4
	UPANATOM	 : 9.8
	THE FAT MAN	 : 7.2
	LOONG NIEN	 : 9.3
	SPRINGFIELD TATTOO	 : 6.1
	CHANGE GEAR	 : 5.8
	ART PLEASER	 : 7.6
	FIELD MAJOR	 : 8.9
	ROYAL WITNESS	 : 7.4
	WHATS THE BIG IDEA	 : 6.0
	ERIC CLAPTON	 : 7.8
	FOUR SMART ACES	 : 5.6
Race Date : 1/2/2016
Venue Name : Albion Park
Distance : 2138
	CEE UIN L A	 : 6.3
	JOYS A BABE	 : 15.1
	MAGIC OATS	 : 6.8
	MONTANA FALCON	 : 12.7
	COMPTON STREET	 : 10.6
	COLD IDEAL	 : 6.1
	BROADWAY PLAYBOY	 : 8.0
	MISTA MARA	 : 6.2
	FEARLESS LEADER	 : 13.0
	FUNNY BOY	 : 7.0
	STORMFORTHEBOYS	 : 8.4
Race Date : 1/2/2016
Venue Name : Geelong
Distance : 2100
	EMMANUEL	 : 15.2
	STARZZZ OF ICON	 : 20.4
	JUS

In [94]:
a= venue_le.inverse_transform([1])
a

array(['Albion Park'], dtype=object)