In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import SGDClassifier, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from tensorflow import keras as k
import tensorflow as tf

import pandas as pd
from utility import *

In [None]:
features = ['HomeTeam', 'AwayTeam', 
            'HTeamEloScore', 'ATeamEloScore', 
            'HTdaysSinceLastMatch', 'ATdaysSinceLastMatch', 
            'HTW_rate', 'ATW_rate', 'ATD_rate', 'HTD_rate', 
            '7_HTW_rate', '12_HTW_rate', '7_ATW_rate', '12_ATW_rate', 
            '7_HTD_rate', '12_HTD_rate', '7_ATD_rate', '12_ATD_rate',
            '7_HTL_rate', '12_HTL_rate', '7_ATL_rate', '12_ATL_rate',
            '5_HTHW_rate', '5_ATAW_rate']

In [None]:
def load_data():
    data = pd.read_csv('./input/dataset.csv')
    enc = OrdinalEncoder().fit((data['HomeTeam']).to_numpy().reshape(-1, 1))
    data['HomeTeam'] = enc.transform((data['HomeTeam']).to_numpy().reshape(-1, 1))
    data['AwayTeam'] = enc.transform((data['AwayTeam']).to_numpy().reshape(-1, 1))
    X = pd.get_dummies(data[features])
    y = data[['FTR']].to_numpy().ravel().reshape(-1, 1)
    return X, y

In [None]:
X, y = load_data()
enc = OneHotEncoder(sparse=False)
y = enc.fit_transform(y)
X_imputed = SimpleImputer().fit_transform(X)

In [None]:
trn_ssn = [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
tst_ssn = [2016,2017,2018] 

trn_ssn_len = len(trn_ssn)
tst_ssn_len = len(tst_ssn)

test_size = float(tst_ssn_len)/(tst_ssn_len + trn_ssn_len)

#Split X and Y into training and Test Sets
x_train, x_test, y_train, y_test = train_test_split(X_imputed, y, shuffle=False, test_size=test_size)
x_train = np.reshape(x_train, [x_train.shape[0], x_train.shape[1], 1])
x_test = np.reshape(x_test, [x_test.shape[0], x_test.shape[1], 1])
print(x_train.shape)
print(x_test.shape)

In [None]:
lstm = k.models.Sequential([
    k.layers.LSTM(10, activation='relu'),
    k.layers.BatchNormalization(),
    k.layers.Dense(3, activation='softmax')
])
lstm(x_train)
lstm.summary()

In [None]:
lstm.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(0.0001),
    metrics=["categorical_accuracy"]
)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

lstm.fit(x_train, y_train, epochs=500, callbacks=[callback], batch_size=64)
#lstm = k.models.load_model('./models/lstm_senza_squadre')


In [None]:
lstm.save('./models/lstm_senza_squadre')

In [None]:
lstm.evaluate(x_test, y_test)
report(lstm, x_train, y_train, 'train', enc)
report(lstm, x_test, y_test, 'test', enc)