# A simple NN for soccer match result prediction

## Data import

In [43]:
import numpy as np
import pickle as pkl
from sklearn import preprocessing

In [41]:
import torch
import torch.utils.data

In [21]:
#load the data
m_ft = pkl.load(open('M_ft.p', 'rb')) #matchs_features as dict with leagues as keys
scores = pkl.load(open('Score.p', 'rb')) #matchs_results as dict with leagues as keys

In [93]:
#concatenate the data for the different leagues
X = np.concatenate([m_ft[key] for key in m_ft])
y = np.concatenate([scores[key] for key in scores])

In [94]:
#removing the samples containing NaNs
mf_nan = np.isnan(X).sum(1)
X = X[mf_nan == 0, :]
y = y[mf_nan == 0, :]

In [95]:
#getting label vector from scores
y = 1 + np.sign(y[:,0] - y[:,1])

In [96]:
#rescaling the data
min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)

In [97]:
#split between train and test
train_prop = 0.8
train_size = int(train_prop * len(X))

X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [118]:
#proportions of the classes in the training set for the CrossEntropy Loss 
class_weights = [sum(y_train == i) / len(y_train) for i in range(3)]

In [119]:
#convert numpy arrays to tensors
X_train_ts, X_test_ts = torch.FloatTensor(X_train), torch.FloatTensor(X_test)
y_train_ts, y_test_ts = torch.LongTensor(y_train), torch.LongTensor(y_test)

class_weights_ts = torch.FloatTensor(class_weights)

In [356]:
pkl.dump((X_train_ts, X_test_ts, y_train_ts, y_test_ts, class_weights_ts), open('soccer_tensors.p', 'wb'))

## Model design

In [2]:
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [247]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(614, 50)
        self.fc2 = nn.Linear(50, 3)
        self.dropoutLayer = nn.Dropout(p = 0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropoutLayer(x)
        x = self.fc2(x)
        return x


net = Net()

## Objective and optimizer setup

In [59]:
import torch.optim as optim

In [328]:
criterion = nn.CrossEntropyLoss(weight = class_weights_ts)
optimizer = optim.SGD(net.parameters(), lr=0.00005, momentum=0.9)

In [307]:
criterion = nn.CrossEntropyLoss(weight = class_weights_ts)
optimizer = optim.Adadelta(net.parameters())

## Training

In [129]:
#building datasets
trainset = torch.utils.data.TensorDataset(X_train_ts, y_train_ts)
testset = torch.utils.data.TensorDataset(X_test_ts, y_test_ts)

trainloader = torch.utils.data.DataLoader(trainset, shuffle=True, batch_size= 256)
testloader = torch.utils.data.DataLoader(testset, shuffle=True, batch_size= 256)

In [293]:
n_epochs = 1000

In [329]:
for epoch in range(n_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
    """    
    print('[%d] loss: %.3f' % (epoch + 1, running_loss / i+1))
    running_loss = 0.0
    """
print('Finished Training')

Finished Training


## Evaluating on the test data

In [345]:
correct = 0
total = 0
for data in testloader:
    matchs, labels = data
    outputs = net(Variable(matchs))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy of the network: %.1f %%' % (100. * correct / total))

Accuracy of the network: 49.5 %
