In [1]:
import pandas as pd

matches = pd.read_csv("web_scraping/matches.csv", index_col=0)
matches.dtypes
# create features to predict on
matches["date"] = pd.to_datetime(matches["date"])
matches["venue_code"] = matches["venue"].astype("category").cat.codes
matches["opp_code"] = matches["opponent"].astype("category").cat.codes
matches["hour"] = matches["time"].str.replace(":.+", "", regex=True).astype("int")
matches["day_code"] = matches["date"].dt.dayofweek

matches["gf"] = matches["gf"].astype("int")
matches["ga"] = matches["ga"].astype("int")
# create label to predict
matches["target"] = (matches["result"] == "W").astype("float")

data_df = matches[["venue_code","opp_code","hour","day_code","gf","ga","target"]]

In [None]:
from FootballMatchDataset import FootballMatchDataset

dataset = FootballMatchDataset(data_df)
data_df.info()
#type(dataset[0][1])
#dataset[0][0].dtype

In [3]:
from FootballMatchDataset import FootballMatchDataset
from FootballMatchClassfier import FootballMatchClassfier
from torch.utils.data import DataLoader, random_split
import torch 
from torch.utils.tensorboard import SummaryWriter

dataset = FootballMatchDataset(data_df)
train_dataset,test_dataset,val_dataset = random_split(dataset, [0.7,0.2,0.1])

trainloader = DataLoader(train_dataset,batch_size=64,shuffle=True,drop_last=True)
valloader = DataLoader(val_dataset,batch_size=64,shuffle=True,drop_last=True)
testloader = DataLoader(test_dataset,batch_size=64,shuffle=True,drop_last=True)

model = FootballMatchClassfier(6)
optimiser = torch.optim.SGD(model.parameters(), lr=0.0001)
loss_function = torch.nn.BCELoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

epochs = 100
for i in range(epochs):    
    print(f'Epoch: {i}')
    #writer = SummaryWriter()
    batch_id = 0     
    # Set the model to run on the device
    model = model.to(device)
    model.train(True)     
    for batch in trainloader:
        # get features and labels from the batch
        features,labels = batch
        features = features.to(device)
        labels = labels.to(device, non_blocking=True)
        # loss.backward does not overwrite, it adds. To stop this, we set the gradients back to zero. sets the .grad of all optimized tensors to zero
        optimiser.zero_grad()
        # make a prediction
        prediction = model(features)
        # calculate loss
        criterion = loss_function(prediction,labels.unsqueeze(1))
        # backward function calculates the gradient of the current tensor w.r.t graph leaves
        criterion.backward()
        # moves each parameter in the opposite direction of the gradient, proportional to the learning rate
        optimiser.step()
        #writer.add_scalar('Loss', criterion.item(), batch_id)
        batch_id += 1
        # print(f'Batch: {batch_id}')
    model.eval()
    val_loss = 0.0
    val_steps = 0
    total = 0
    correct = 0
    for i, data in enumerate(valloader, 0):
        with torch.no_grad():
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loss = loss_function(prediction,labels.unsqueeze(1))
            val_loss += loss.cpu().numpy()
            val_steps += 1            
            print (f'Loss:{val_loss / val_steps} Accuracy:{correct / total}')

Epoch: 0
Loss:0.7121776663685871 Accuracy:0.59375
Loss:0.696019531998259 Accuracy:0.578125
Loss:0.6929414001481096 Accuracy:0.609375
Epoch: 1
Loss:0.6983984144186006 Accuracy:0.765625
Loss:0.7129735095668117 Accuracy:0.6640625
Loss:0.7168746597666228 Accuracy:0.6302083333333334
Epoch: 2
Loss:0.7066284879262341 Accuracy:0.578125
Loss:0.7031218149404845 Accuracy:0.609375
Loss:0.712805901076461 Accuracy:0.6197916666666666
Epoch: 3
Loss:0.7845220086806022 Accuracy:0.53125
Loss:0.7691169446603857 Accuracy:0.578125
Loss:0.759060622835047 Accuracy:0.5885416666666666
Epoch: 4
Loss:0.8647408708469443 Accuracy:0.609375
Loss:0.776984077008599 Accuracy:0.5859375
Loss:0.7534644536623646 Accuracy:0.6197916666666666
Epoch: 5
Loss:0.8887131359665295 Accuracy:0.625
Loss:0.8520474714416484 Accuracy:0.578125
Loss:0.8363649774447784 Accuracy:0.6197916666666666
Epoch: 6
Loss:0.8327968433491971 Accuracy:0.6875
Loss:0.8734264119257504 Accuracy:0.6328125
Loss:0.9724802412014292 Accuracy:0.5833333333333334
Epo

KeyboardInterrupt: 