# Dependencies

In [2]:
import os

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.utils import to_categorical
from keras.optimizers import SGD


import pandas as pd

import numpy as np
from numpy import argmax

import sklearn
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# Import Data

In [21]:
loc = os.getcwd()+'/../model/data/'
data = pd.read_pickle(loc+'stats.pkl')

# Data preperation

In [22]:
# features to use 
features = ['odds1','oddsX','odds2','DiffFormPts','DiffPts','DiffGD']
#features = ['DiffFormPts','DiffPts','DiffGD']
colToUse = features + ['ftr']

# drop rows with empty cells
data = data.dropna(subset=colToUse, how='any')

In [11]:
# split to X and Y
X_all = np.array(data.filter(features))
y_all = to_categorical(np.array(data.ftr.astype("category").cat.codes)).astype(int)
n_cols = X_all.shape[1]

# split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=2)

# Model training 

## 1: Sequence classification with LSTM:
**Best setup so far:**

model.add(Dense(50, activation='relu', input_shape=(n_cols,)))

model.add(Dense(50, activation='relu',))

model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


**Loss: 0.6912296541635766, Acc: 0.7183490680366993**

**Notes**
Test without odds as features gives acc .697

In [12]:
model = Sequential()

model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
model.add(Dense(50, activation='relu',))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## Train with test-split to check acc 

In [13]:
model.fit(X_train, y_train, epochs=5)

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Loss: {test_loss}, Acc: {test_acc}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss: 0.6906432761923218, Acc: 0.7177438876929568


## Train on all data to maximize trainingdata

In [14]:
model.fit(X_all, y_all, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x132bda358>

# Save model

In [None]:
# %% Save the model
model.save('model_201902182100.h5')  # creates a HDF5 file 'my_model.h5'

# Predictions

In [15]:
# make features to np array
features = X_test

# use model to make predictions
predicted = np.array(model.predict(features))

# process predictions and odds so that "Odds Fav" and "Predicted Fav" is appendet to each game
rows_list = []
odds_names = ['Away', 'Draw', 'Home']
odds_order = ['Home', 'Draw', 'Away']
y_names = ['Away', 'Draw', 'Home']
for i in range(features.shape[0]):
    odds = features[i]
    prob = predicted[i]
    winner = y_test[i]
    dict = {'Prob1': prob[2], 'ProbX': prob[1], 'Prob2': prob[0],
            'Predicted': odds_names[np.argmax(prob)], 'Odds1': odds[0], 'OddsX': odds[1], 'Odds2': odds[2],'OddsFav': odds_order[np.argmin(odds[0:3])], 'Winner': y_names[np.argmax(winner)] }
    rows_list.append(dict)

first_columns = ['OddsFav', 'Predicted','Winner', 'Prob1', 'ProbX', 'Prob2', 'Odds1', 'Odds2', 'Odds2']
df = pd.DataFrame(rows_list, columns=first_columns)

In [16]:
df

Unnamed: 0,OddsFav,Predicted,Winner,Prob1,ProbX,Prob2,Odds1,Odds2,Odds2.1
0,Home,Home,Home,0.778994,0.085812,0.135193,1.27,4.76,4.76
1,Away,Away,Away,0.104316,0.045641,0.850043,9.18,1.07,1.07
2,Home,Home,Away,0.517341,0.123738,0.358921,1.90,2.27,2.27
3,Home,Home,Home,0.936246,0.028135,0.035619,1.05,11.75,11.75
4,Home,Home,Home,0.973913,0.013300,0.012788,1.01,15.50,15.50
5,Away,Away,Away,0.300789,0.111986,0.587225,3.00,1.37,1.37
6,Away,Away,Away,0.143599,0.056040,0.800361,6.41,1.14,1.14
7,Home,Home,Away,0.666572,0.117479,0.215949,1.40,3.40,3.40
8,Home,Home,Home,0.660122,0.112365,0.227513,1.54,3.07,3.07
9,Away,Away,Draw,0.413125,0.113610,0.473266,2.44,1.69,1.69


# Compare OddsFav and Predicted

In [None]:
mask = (df.OddsFav != df.Predicted) & (df.Winner == df.OddsFav)
mask2 = (df.OddsFav != df.Predicted) & (df.Winner == df.Predicted)
OddsFavCorrect = df.loc[mask].shape[0]
PredictedCorrect = df.loc[mask2].shape[0]

print(f"Oddsfav correct: {OddsFavCorrect}\nPredicted correct: {PredictedCorrect}")

In [25]:
df = data.copy()
mask = (df.Winner == df.OddsFav)
mask2 = (df.Winner == df.Predicted)
OddsFavCorrect = df.loc[mask].shape[0]
PredictedCorrect = df.loc[mask2].shape[0]

print(f"Oddsfav correct: {OddsFavCorrect}\nPredicted correct: {PredictedCorrect}")

AttributeError: 'DataFrame' object has no attribute 'Winner'

In [40]:
df = data.copy()
df['Home'] = df['odds1']
df['Draw'] = df['oddsX']
df['Away'] = df['odds2']
df['OddsFav'] = df[['Home','Draw','Away']].idxmin(axis=1)

mask = (df.ftr == df.OddsFav)
OddsFavCorrect = df.loc[mask].shape[0]
OddsFavCorrect/df.shape[0]


0.717100803718408