In [1]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras_tqdm import TQDMNotebookCallback
import keras.callbacks

from sqlalchemy import create_engine
import json

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
version = 'v1'

bet_type = 'Q'

race_type = 'R'

In [13]:
engine = create_engine('sqlite:///../../data/exotic.db')
conn = engine.connect()
conn
dfoo = pd.read_sql_table('exotic', conn)
print('{} exotics loaded!'.format(len(dfoo)))

307571 exotics loaded!


In [14]:
dfo = dfoo.loc[dfoo['race_type'].isin([race_type])]
print('{} {} races!'.format(len(dfo), race_type))

307571 R races!


In [19]:
dfo.tail(10)

Unnamed: 0,id,race_type,bet_type,res1,res2,res3,res4,num_runners,run1_num,run1_win_perc,...,run4_num,run4_win_perc,run4_win_scaled,run4_win_rank,run4_place_perc,run4_place_scaled,run4_place_rank,prediction,success,dividend
307561,307562,R,Q,6,5,11,9,13.0,11,0.038168,...,,,,,,,,,0,89.1
307562,307563,R,Q,6,5,11,9,13.0,11,0.038168,...,,,,,,,,,0,89.1
307563,307564,R,Q,6,5,11,9,13.0,11,0.038168,...,,,,,,,,,0,89.1
307564,307565,R,Q,6,5,11,9,13.0,11,0.038168,...,,,,,,,,,0,89.1
307565,307566,R,Q,6,5,11,9,13.0,5,0.032154,...,,,,,,,,,0,89.1
307566,307567,R,Q,6,5,11,9,13.0,5,0.032154,...,,,,,,,,,0,89.1
307567,307568,R,Q,6,5,11,9,13.0,5,0.032154,...,,,,,,,,,0,89.1
307568,307569,R,Q,6,5,11,9,13.0,9,0.032154,...,,,,,,,,,0,89.1
307569,307570,R,Q,6,5,11,9,13.0,9,0.032154,...,,,,,,,,,0,89.1
307570,307571,R,Q,6,5,11,9,13.0,13,0.012048,...,,,,,,,,,0,89.1


In [20]:
# get label data
Y = dfo['success']

#print(data['finishingPosition'].head(10))
Y = Y.astype(int)
# Y.head(10)
Y.describe()

count    307571.000000
mean          0.011379
std           0.106066
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: success, dtype: float64

In [22]:
xn = dfo['num_runners']

x1wp = dfo['run1_win_perc']
x1ws = dfo['run1_win_scaled']
x1wr = dfo['run1_win_rank']
x1pp = dfo['run1_place_perc']
x1ps = dfo['run1_place_scaled']
x1pr = dfo['run1_place_rank']

x2wp = dfo['run2_win_perc']
x2ws = dfo['run2_win_scaled']
x2wr = dfo['run2_win_rank']
x2pp = dfo['run2_place_perc']
x2ps = dfo['run2_place_scaled']
x2pr = dfo['run2_place_rank']

X = pd.concat([xn, 
               x1wp, x1ws, x1wr, x1pp, x1ps, x1pr,
               x2wp, x2ws, x2wr, x2pp, x2ps, x2pr
              ], axis=1)
X.describe()

Unnamed: 0,num_runners,run1_win_perc,run1_win_scaled,run1_win_rank,run1_place_perc,run1_place_scaled,run1_place_rank,run2_win_perc,run2_win_scaled,run2_win_rank,run2_place_perc,run2_place_scaled,run2_place_rank
count,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0,307571.0
mean,11.612854,0.160377,0.13555,0.757941,0.418737,0.120957,0.74652,0.054387,0.046008,0.433814,0.209458,0.060601,0.453725
std,2.501838,0.123352,0.103836,0.202889,0.204893,0.062255,0.21581,0.047745,0.040373,0.212575,0.140881,0.04218,0.225546
min,3.0,0.00312,0.002561,0.111111,0.013263,0.003719,0.055556,0.001122,0.000946,0.055556,0.007479,0.002242,0.055556
25%,10.0,0.071429,0.06058,0.625,0.25641,0.073852,0.6,0.020964,0.017732,0.266667,0.103093,0.029345,0.277778
50%,12.0,0.126582,0.107661,0.8,0.4,0.112658,0.785714,0.039216,0.033225,0.416667,0.175439,0.049771,0.428571
75%,13.0,0.212766,0.180827,0.928571,0.555556,0.156542,0.923077,0.071942,0.06087,0.583333,0.277778,0.080537,0.615385
max,18.0,0.961538,0.80156,1.0,1.0,0.946292,1.0,0.5,0.417292,1.0,1.0,0.488712,1.0


In [24]:
# For a single-input model with 2 classes (binary classification):

n = len(X.columns)
print('input dimension = {}'.format(n))

epochs = 100
print('epochs = {}'.format(epochs))

layer_1 = 40
layer_2 = 40

tag = '{}{}x{}{}'.format(race_type, layer_1, layer_2, bet_type)
print('tag = {}'.format(tag))
file_name = '/Users/jaco/code/tabby/exotic/{}/models/{}.h5'.format(version, tag)

try:
    model = load_model(file_name)
    print('model loaded')
except OSError:
    model = Sequential()
    model.add(Dense(layer_1, activation='relu', input_dim=n))
    model.add(Dense(layer_2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    print('model created')

input dimension = 13
epochs = 100
tag = R40x40Q
model created


In [25]:
# Train the model, iterating on the data in batches of 32 samples
tqdm = TQDMNotebookCallback()

tbCallBack = keras.callbacks.TensorBoard(
    log_dir='/Users/jaco/code/tabby/exotic/{}/summary/{}'.format(version, tag), 
    histogram_freq=0,
    write_graph=True,
    write_images=True)

model.fit(
    X.as_matrix(), 
    Y.as_matrix(),
    validation_split=0.2,
    shuffle=True,
    epochs=epochs,
    batch_size=32,
    verbose=0,
    callbacks=[tqdm, tbCallBack])

# creates a HDF5 file
model.save(file_name)
print('model saved')


model saved
