In [238]:
from argparse import ArgumentParser
from fastprogress.fastprogress import master_bar, progress_bar
import torch
import pandas as pd
import os
import numpy as np

In [None]:
def train_one_epoch(dataloader, model, criterion, optimizer, device, mb):

    # Put the model into training mode
    model.train()

    # Loop over the data using the progress_bar utility
    for _, (X, Y) in progress_bar(DataLoaderProgress(dataloader), parent=mb):
        X, Y = X.to(device), Y.to(device)

        # Compute model output and then loss
        output = model(X)
        loss = criterion(output, Y)

        # - zero-out gradients
        optimizer.zero_grad()
        # - compute new gradients
        loss.backward()
        # - update paramters
        optimizer.step()

In [None]:
def validate(dataloader, model, criterion, device, epoch, num_epochs, mb):

    # Put the model into validation/evaluation mode
    model.eval()

    N = len(dataloader.dataset)
    num_batches = len(dataloader)

    loss, num_correct = 0, 0

    # Tell pytorch to stop updating gradients when executing the following
    with torch.no_grad():

        for X, Y in dataloader:
            X, Y = X.to(device), Y.to(device)

            # Compute the model output
            output = model(X)

            # - compute loss
            loss += criterion(output, Y).item()
            # - compute the number of correctly classified examples
            num_correct += (output.argmax(1) == Y).type(torch.float).sum().item()

        loss /= num_batches
        accuracy = num_correct / N

    message = "Initial" if epoch == 0 else f"Epoch {epoch:>2}/{num_epochs}:"
    message += f" accuracy={100*accuracy:5.2f}%"
    message += f" and loss={loss:.3f}"
    mb.write(message)

In [None]:
def train(model, criterion, optimizer, train_loader, valid_loader, device, num_epochs):

    mb = master_bar(range(num_epochs))

    validate(valid_loader, model, criterion, device, 0, num_epochs, mb)

    for epoch in mb:
        train_one_epoch(train_loader, model, criterion, optimizer, device, mb)
        validate(valid_loader, model, criterion, device, epoch + 1, num_epochs, mb)

In [239]:
def main():

    aparser = ArgumentParser("FIFAI--Train a neural network to predict EPL scorelines.")
    aparser.add_argument("epl_data", type=str, help="Path to store/find the EPL games dataset")
    aparser.add_argument("--num_epochs", type=int, default=10)
    aparser.add_argument("--batch_size", type=int, default=128)
    aparser.add_argument("--learning_rate", type=float, default=0.01)
    aparser.add_argument("--momentum", type=float, default=0.9)
    aparser.add_argument("--gpu", action="store_true")

    args = aparser.parse_args()

    # Use GPU if requested and available
    device = "cuda" if args.gpu and torch.cuda.is_available() else "cpu"
    

    # Get data loaders
    # train_loader, valid_loader = get_epl_data_loaders(args.epl_data, args.batch_size, 0)
    
    
    # model = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(in_features=?, out_features=10),)

    # TODO:
    # - create a CrossEntropyLoss criterion
    # - create an optimizer of your choice
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum)

    train(model, criterion, optimizer, train_loader, valid_loader, device, args.num_epochs)

In [240]:
games_per_season_per_team = 38
num_seasons = 5

total_games_per_team = games_per_season_per_team * num_seasons


def load_data():
    path = "../teams_five_season_data"
    file_out = pd.read_csv(path + "/Arsenal16_21.csv")
    
#     print(file_out)
#     x_axis = file_out.iloc[1:38, 5].values
    print(len(x_axis))
#     y_axis = file_out.iloc[0, 1:8].values
#     for file in os.listdir(path):
#         print(file)


In [342]:
t1 = Team("../teams_five_season_data/Arsenal16_21.csv")

training data:
[[6511411510111097108, 76105118101114112111111108, 3, 4, 0, 0], [7610110599101115116101114, 6511411510111097108, 0, 0, 0, 0], [8797116102111114100, 6511411510111097108, 1, 3, 1, 0], [6511411510111097108, 8311111711610497109112116111110, 2, 1, 2, 0], [72117108108, 6511411510111097108, 1, 4, 3, 1], [6511411510111097108, 6710410110811510197, 3, 0, 4, 1], [66117114110108101121, 6511411510111097108, 0, 1, 5, 1], [6511411510111097108, 831199711011510197, 3, 2, 6, 1], [6511411510111097108, 7710510010010810111598114111117103104, 0, 0, 0, 0], [8311711010010111410897110100, 6511411510111097108, 1, 4, 1, 0], [6511411510111097108, 8411111611610111010497109, 1, 1, 0, 0], [77971103285110105116101100, 6511411510111097108, 1, 1, 0, 0], [6511411510111097108, 66111117114110101109111117116104, 3, 1, 1, 0], [87101115116327297109, 6511411510111097108, 1, 5, 2, 0], [6511411510111097108, 83116111107101, 3, 1, 3, 1], [69118101114116111110, 6511411510111097108, 2, 1, 0, 0], [77971103267105116121

In [340]:
class Team ():
    def __init__(self, file_path):
        file_out = pd.read_csv(file_path)
        # x_axis_labels = file_out.iloc[0, 1:8].values
        
        data = file_out.iloc[1:191, 1:8].values
        x = []
        y = []
        
        # convert team names to int encoding
        for match in data:
            match[0] = encode_string_as_int(match[0])
            match[1] = encode_string_as_int(match[1])
            
            x.append(np.delete(match, 4).tolist())
            y.append(match[4])
            
        
        for match in x:
            for attr in range(0,6):
                match[attr] = int(match[attr])
                
        for match_result in range(len(y)):
            y[match_result] = int(y[match_result])
    
        
#         print("training data:")
#         print(x)
#         print()
        
#         print("match results (aka targets):")
#         print(y)
        
        self.X_train = torch.tensor(x, dtype = torch.double)
        self.Y_train = torch.tensor(y, dtype = torch.double)
    
    def __getitem__(self, idx):
        return self.X_train[idx], self.Y_train[idx]
        

In [324]:
# for converting team names into readable data
def encode_string_as_int(string):
    ret = ""
    for c in string:
        ret += str(ord(c))
    return int(ret)