In [12]:
import pandas as pd

matches = pd.read_csv("../web_scraping/matches.csv", index_col=0)
matches.dtypes
# create features to predict on
matches["date"] = pd.to_datetime(matches["date"])
matches["venue_code"] = matches["venue"].astype("category").cat.codes
matches["opp_code"] = matches["opponent"].astype("category").cat.codes
matches["hour"] = matches["time"].str.replace(":.+", "", regex=True).astype("int")
matches["day_code"] = matches["date"].dt.dayofweek

matches["gf"] = matches["gf"].astype("int")
matches["ga"] = matches["ga"].astype("int")
# create label to predict
matches["target"] = (matches["result"] == "W").astype("float")

data_df = matches[["date","team", "opponent", "gf","ga","target"]]
#print(data_df.groupby("team")["gf"].rolling(window=5).mean().reset_index(0, drop=True))

data_df["avg_gf"] = data_df.groupby("team")["gf"].rolling(window=5).mean().reset_index(0,drop=True)
#data_df["avg_ga"] = data_df.groupby("team")["ga"].rolling(window=5).mean().reset_index(0,drop=True)
#data_df
#matches.head()

ValueError: Cannot set a DataFrame with multiple columns to the single column avg_gf

In [None]:
from FootballMatchDataset import FootballMatchDataset
from match_prediction_model.FootballMatchPredictionModel import FootballMatchPredictionModel
from torch.utils.data import DataLoader, random_split
import torch 
from torch.utils.tensorboard import SummaryWriter

dataset = FootballMatchDataset(data_df)
train_dataset,test_dataset,val_dataset = random_split(dataset, [0.7,0.2,0.1])

trainloader = DataLoader(train_dataset,batch_size=64,shuffle=True,drop_last=True)
valloader = DataLoader(val_dataset,batch_size=64,shuffle=True,drop_last=True)
testloader = DataLoader(test_dataset,batch_size=64,shuffle=True,drop_last=True)

model = FootballMatchPredictionModel(4)
optimiser = torch.optim.SGD(model.parameters(), lr=0.0001)
loss_function = torch.nn.BCELoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

epochs = 100
writer = SummaryWriter()
for i in range(epochs):    
    print(f'Epoch: {i}')
    batch_id = 0     
    # Set the model to run on the device
    model = model.to(device)
    model.train(True)     
    for batch in trainloader:
        # get features and labels from the batch
        features,labels = batch
        features = features.to(device)
        labels = labels.to(device, non_blocking=True)
        # loss.backward does not overwrite, it adds. To stop this, we set the gradients back to zero. sets the .grad of all optimized tensors to zero
        optimiser.zero_grad()
        # make a prediction
        prediction = model(features)
        # calculate loss
        criterion = loss_function(prediction,labels.unsqueeze(1))
        # backward function calculates the gradient of the current tensor w.r.t graph leaves
        criterion.backward()
        # moves each parameter in the opposite direction of the gradient, proportional to the learning rate
        optimiser.step()
        batch_id += 1
    #     # print(f'Batch: {batch_id}')
    # writer.add_scalar('Loss', criterion.item(), i)    
    model.eval()
    val_loss = 0.0
    val_steps = 0
    total = 0
    correct = 0
    for data in valloader:
        with torch.no_grad():
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loss = loss_function(prediction,labels.unsqueeze(1))
            val_loss += loss.cpu().numpy()
            val_steps += 1     
               
    print (f'Loss:{val_loss / val_steps} Accuracy:{correct / total}')
    
    writer.add_scalar('AVG Loss', val_loss / val_steps, i)       
    writer.add_scalar('Accuracy', correct / total, i)   
    