In [215]:
import sys
import torch
from random import randint
from torch import nn
from torch.nn import functional as F
from tqdm import tqdm
from IPython.display import clear_output

# Find a way to deal with this better (importing utils)
sys.path.append('../')
from utils.util import MatchData

# Use CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == 'cuda':
    print(f'Using cuda device {torch.cuda.get_device_name()}')
else:
    print(f"Using {device} device")

Using cuda device NVIDIA GeForce RTX 3060 Laptop GPU


In [216]:
# Load data    
load_path = '../../data/processed/processed_match_data_euw1_1667433600_1668520576.csv'
data = MatchData(load_path, device = device, data_format = 'champ')
(X_tr, Y_tr), (X_te, Y_te) = data.test_train_split(split_size=0.8, seed=42)
batches = data.create_mini_batches(batch_size=32)

In [217]:
# MLP nn.Module

class MLP(nn.Module):
    def __init__(self, data:MatchData, embed_dim:int, hidden_size:int, hidden_depth:int, dropout_p):
        super(MLP, self).__init__()
        
        self.embed = nn.Embedding(data.num_champs, embed_dim)
        self.flatten = nn.Flatten()
        
        hidden_layers = [nn.Linear(embed_dim*10, hidden_size)]
        for i in range(hidden_depth-1):
            hidden_layers.append(nn.ReLU())
            #hidden_layers.append(nn.Dropout(dropout_p))
            hidden_layers.append(nn.Linear(hidden_size, hidden_size))
        hidden_layers.append(nn.ReLU())
        self.hidden = nn.Sequential(*hidden_layers)

        self.out = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        embedded = self.embed(x)
        flattened = self.flatten(embedded)
        hidden_out = self.hidden(flattened)
        logits = self.out(hidden_out)
        probs = self.sigmoid(logits)
        
        return probs

In [219]:
# Hyper parameters & initialisation 

embed_size = 5 # Embedding size
hidden_size = 100 # Hidden size
hidden_depth = 2 # Hidden depth
dropout_p = 0.5 # Dropout probability

learning_rate = 0.001 # Learning rate

# Model
model = MLP(data, embed_size, hidden_size, hidden_depth, dropout_p).to(device)
print(f'{sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters.')
print(model)

# Loss function and optimiser
loss_fn = nn.BCELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

16116 parameters.
MLP(
  (embed): Embedding(163, 5)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (hidden): Sequential(
    (0): Linear(in_features=50, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
  )
  (out): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [220]:
# Train functions

batch_size = batches[0][0].shape[0]

@torch.no_grad()
def test(x, y):
    """Returns loss and accuracy"""
    
    model.eval()
    y_pred = model.forward(x)
    loss = round(loss_fn(y_pred, y).item(), 4)
    accuracy = round((y_pred.round() == y).sum().item() / y.shape[0], 4)
    
    return loss, accuracy

# Terribly inefficient, just for testing purposes
def shuffle_data(batches_):
    new_batches_ = []
    for i, (X_b, Y_b) in enumerate(batches_[:-1]):
        ind1 = torch.argsort(torch.rand(batch_size, 5), dim=-1).to(device)
        ind2 = torch.argsort(torch.rand(batch_size, 5), dim=-1).to(device)
        X_b[:, 5:] = torch.gather(X_b[:, 5:], dim=-1, index=ind1)
        X_b[:, :5] = torch.gather(X_b[:, :5], dim=-1, index=ind2)
        new_batches_.append((X_b, Y_b))
    return new_batches_


def train(n_epochs, batches):
    for epoch in range(n_epochs):
        print(f'Epoch {epoch+1} / {n_epochs} complete:')
    
        train_loss, train_acc = test(X_tr, Y_tr)
        test_loss, test_acc = test(X_te, Y_te)
        print(f'Train loss = {train_loss}. Train accuracy = {train_acc}.')
        print(f'Test loss = {test_loss}. Test accuracy = {test_acc}.')
        
        train_epoch(batches)
        #batches = shuffle_data(batches) # Just for testing purposes
        clear_output(wait=True) # Delete cell outputs


def train_epoch(batches_):
    model.train()
    with tqdm(total=(len(batches_))) as progress:
        for i, (X_b, Y_b) in enumerate(batches):

            Y_pred = model.forward(X_b)
            loss = loss_fn(Y_pred, Y_b)
            
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()

            progress.update(1)


In [221]:
train(100, batches)

Epoch 39 / 100 complete:
Train loss = 0.6615. Train accuracy = 0.591.
Test loss = 0.7409. Test accuracy = 0.5114.


  6%|▌         | 23/375 [00:00<00:01, 193.33it/s]


KeyboardInterrupt: 