**Module that creates, trains and predicts La Liga matches**

## Libraries

In [1]:
from pathlib import Path # Path manipulation
import os # OS library
import numpy as np # Vector - Matrix Library

import pandas as pd # Data import, manipulation and processing 
from datetime import datetime

from keras.models import Sequential
from keras.layers import Dense

from data_functions import * # Private library of functions related to La Liga Dataset
from data_preprocessing import * # Private library of functions related to La Liga Dataset

from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier

from sklearn.metrics import classification_report, confusion_matrix

Using TensorFlow backend.


## Variables

In [2]:
data_folder = Path("../data")
train_filename = '05-17_train.csv'
test_filename = '17-19_test.csv'
train_file_path = data_folder / train_filename
test_file_path = data_folder / test_filename

In [3]:
jornada_flag = True
jornada_val = 6

normalize_flag = True

one_hot_flag = True

In [4]:
hl_shape = 16
hl_activation = 'relu'

ol_shape = 3
ol_activation = 'softmax'

## Functions

In [5]:
def create_model(n_features):
    #create model
    model = Sequential()

    #add model layers
    model.add(Dense(hl_shape, activation=hl_activation, input_dim=n_features))
    #model.add(Dense(10, activation='relu'))
    model.add(Dense(ol_shape, activation=ol_activation))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [6]:
def train_model(model):
    pass

In [7]:
def test_model(model):
    pass

In [8]:
def create_svm():
    return OneVsRestClassifier(SVC(kernel='linear'))

## Execution

In [9]:
train = pd.read_csv(data_folder / train_filename)
test = pd.read_csv(data_folder / test_filename)

In [45]:
columns = get_readability_column_labels()
columns.append('result')


mapping = {'H' : 0, 'D' : 1 , 'A' : 2}
# Drop the rows corresponding to the first 5 jornada and drop the readability columns
if jornada_flag:
    train = train.drop(train[train['jornada'] < jornada_val].index, axis=0)
    test = test.drop(train[train['jornada'] < jornada_val].index, axis=0)

if normalize_flag: 
    train = normalize_columns(train)
    test = normalize_columns(test)
    
X_train = train.drop(columns = columns)
X_test = test.drop(columns = columns)
y_train = train['result']
y_test = test['result']

y_train = y_train.replace(mapping)
y_test = y_test.replace(mapping)

if one_hot_flag:
    y_train = pd.get_dummies(y_train)

print(y_train)

      0  1  2
50    0  1  0
51    1  0  0
52    0  1  0
53    0  1  0
54    1  0  0
...  .. .. ..
4555  0  0  1
4556  0  1  0
4557  1  0  0
4558  1  0  0
4559  0  0  1

[3961 rows x 3 columns]


In [46]:
epochs = 10
batch_size = 4
verbose = 1

In [47]:
n_features = X_train.shape[1]
model = create_model(n_features)
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 16)                592       
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 51        
Total params: 643
Trainable params: 643
Non-trainable params: 0
_________________________________________________________________


In [48]:
model.fit(X_train, y_train,
         epochs=epochs,
         batch_size=batch_size,
         verbose=verbose)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fd689a7a310>

In [49]:
Y_pred = model.predict(X_test)
Y_pred

array([[0.3617315 , 0.32693306, 0.3113354 ],
       [0.3617315 , 0.32693306, 0.3113354 ],
       [0.3617315 , 0.32693306, 0.3113354 ],
       ...,
       [0.4196522 , 0.28643987, 0.29390797],
       [0.41554376, 0.2654096 , 0.3190467 ],
       [0.46402198, 0.2110464 , 0.32493162]], dtype=float32)

In [50]:

y_pred = np.argmax(Y_pred, axis=1)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [51]:
print(confusion_matrix(y_test, y_pred))

[[642   3 122]
 [374   3  94]
 [306   4 115]]


In [52]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.49      0.84      0.61       767
           1       0.30      0.01      0.01       471
           2       0.35      0.27      0.30       425

    accuracy                           0.46      1663
   macro avg       0.38      0.37      0.31      1663
weighted avg       0.40      0.46      0.36      1663

