In [10]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras_tqdm import TQDMNotebookCallback
import keras.callbacks

from sqlalchemy import create_engine
import json
from operator import itemgetter

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
version = 'v3'

# race_type = 'R'
race_type = 'G'
# race_type = 'H'

bet_type = 'W'
# bet_type = 'P'

In [13]:
engine = create_engine('sqlite:///../../data/race.db')
conn = engine.connect()
dfoo = pd.read_sql_table('race', conn)
print('{} races loaded!'.format(len(dfoo)))

37902 races loaded!


In [14]:
dfo = dfoo.loc[dfoo['race_type'].isin([race_type])]
print('{} {} races!'.format(len(dfo), race_type))

15515 G races!


In [15]:
dfo['results'] = dfo['results_data'].map(json.loads)
dfo['runners'] = dfo['runners_data'].map(json.loads)
dfo.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,id,meeting_name,location,venue_mnemonic,race_type,meeting_date,race_number,race_name,race_start_time,race_status,race_distance,results_data,num_runners,runners_data,quinella,exacta,trifecta,first_four,results,runners
11,16,CANBERRA,ACT,C,G,2017-10-01,1,THE CLIMATE DOCTOR MAIDEN,2017-10-01 05:57:00,Paying,310,"[[1], [7], [2], [3]]",8.0,"[{""runnerName"": ""SUTTON LEWIS"", ""runnerNumber""...",2.1,3.6,15.3,72.0,"[[1], [7], [2], [3]]","[{'runnerName': 'SUTTON LEWIS', 'runnerNumber'..."
12,19,CANBERRA,ACT,C,G,2017-10-01,4,TOLEDO TRAILERS STAKES,2017-10-01 07:00:00,Paying,600,"[[3], [8], [7], [6]]",7.0,"[{""runnerName"": ""YOUNG THUG"", ""runnerNumber"": ...",30.8,139.8,780.9,3155.8,"[[3], [8], [7], [6]]","[{'runnerName': 'YOUNG THUG', 'runnerNumber': ..."
13,20,CANBERRA,ACT,C,G,2017-10-01,5,TAB WE LOVE A BET STAKES,2017-10-01 07:18:00,Paying,530,"[[4], [6], [8], [5]]",8.0,"[{""runnerName"": ""UNCHANGEABLE"", ""runnerNumber""...",24.5,21.4,75.0,479.3,"[[4], [6], [8], [5]]","[{'runnerName': 'UNCHANGEABLE', 'runnerNumber'..."


In [16]:
dfo = dfo[dfo['num_runners'] == 8]
print(len(dfo))

11350


In [42]:
# build data
X, Y = [], []
for ri, race in dfo.iterrows():
    x, y = [], []
    runners = [r for r in race['runners'] if r['has_odds']]
    if len(runners) != 8:
        continue
    runners = sorted(runners, key=itemgetter('fwo'))
    for r in runners:
        x.extend([r['fwp'], r['fpp'], r['twp'], r['tpp']])
        y.append(int(r['finishingPosition'] == 1))
    X.append(x)
    Y.append(y)
print('X:{} Y:{}'.format(len(X), len(Y)))

X:11350 Y:11350


In [43]:
# For a single-input model with 2 classes (binary classification):

n = 8 * 4
print('input dimension = {}'.format(n))

epochs = 10000
print('epochs = {}'.format(epochs))

layer_1 = 64
layer_2 = 64
dropout = 0.2

tag = '{}{}x{}{}'.format(race_type, layer_1, layer_2, bet_type)
print('tag = {}'.format(tag))
file_name = '/Users/jaco/code/tabby/each_way/{}/models/{}.h5'.format(version, tag)

try:
    model = load_model(file_name)
    print('model loaded')
except OSError:
    model = Sequential()
    model.add(Dense(layer_1, activation='relu', input_dim=n))
    model.add(Dropout(dropout))    
    model.add(Dense(layer_2, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(8, activation='linear'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print('model created')

input dimension = 32
epochs = 10000
tag = G64x64W
model loaded


In [44]:
# Train the model, iterating on the data in batches of 32 samples
tqdm = TQDMNotebookCallback()

tbCallBack = keras.callbacks.TensorBoard(
    log_dir='/Users/jaco/code/tabby/each_way/{}/summary/{}'.format(version, tag), 
    histogram_freq=0,
    write_graph=True,
    write_images=True)

X_train, X_test = X[:-1000], X[-1000:]
Y_train, Y_test = Y[:-1000], Y[-1000:]

model.fit(X, Y,
    epochs=epochs,
    batch_size=128,
    verbose=0,
    callbacks=[tqdm, tbCallBack])

# creates a HDF5 file
model.save(file_name)
print('model saved')

score = model.evaluate(X_test, Y_test, batch_size=128)
print('')
for l, v in zip(model.metrics_names, score):
    print('{}: {}'.format(l, v))

model saved
 128/1000 [==>...........................] - ETA: 0s
loss: 0.0951937443614006
acc: 0.3679999985694885
