In [1]:
from os import path

import numpy as np
import pandas as pd
import pickle
import time
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader, BatchSampler, Sampler
from torch import nn
import torchvision
from torchvision import datasets, transforms, models
from datetime import datetime 
import math

from torch.utils.tensorboard import SummaryWriter

### DATALOADER

The official dataloader from pytorch's official site uses the DataSet Class to load data via the __getitem__ method. The documentation uses numpy as well. I tried finding examples for torch file loaders but it doesn't seem to exist. __getitem__ uses indexes to load data from a csv file (in my example it's train_clean.csv and test_clean.csv. The game and face files are linked in the csv and are actually stored in the processed folder. The same method puts all the video files and their labels in a dictionary and returns it. The method __process_labels__ reads all the labels in the csv files and puts it into 3 different numpy array. 

The returned object of __getitem__ is: 
sample = {"face": face_data, "game": game_data, "valence": valence_labels, "arousal labels": arousal_labels, "game event labels": game_event_labels}

Since the data has been normalized already there is no need to further transform the data

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

class StreamLolGame(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, npz_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        data = np.load(npz_file, mmap_mode='r')
        self.game_data = torch.from_numpy(data["data"][:,:,:,:])
        #self.valence_labels = torch.from_numpy(data["valence_labels"])
        #self.arousal_labels = torch.from_numpy(data["arousal_labels"])
        self.game_event_labels = torch.from_numpy(data["game_event_labels"])
        
        del data
        self.root_dir = root_dir
        
        ## Because the image data is taken from short videos, images from the same video look almost the same, so data augmentation is a must to improve the model: 
        self.transform = transform
        
        self.game_data = self.game_data.permute(0, 3, 1, 2)
        
        if self.transform:
            self.game_data = self.transform(self.game_data)
        

    def __len__(self):
        return self.game_data.shape[0]

    def __getitem__(self, idx):        
        
        if isinstance(idx, list) == False:
            idx = [idx]

        sample = {"data": self.game_data[idx, :, :, :], "game_event_labels": self.game_event_labels[idx, :]}
                  
        return sample
    
    @staticmethod
    def __process_labels__(raw_y_data):
        """Takes raw labels and seperates them into the correct 'heads'

        :param raw_y_data: Raw inputd ata
        :return: A tuple of arrays, where each array contains all lables for a given output
        """
        valence = np.array([raw_y_data['V_Neg'], raw_y_data['V_Neut'], raw_y_data['V_Pos']])
        arousal = np.array([raw_y_data['A_Neut'], raw_y_data['A_Pos']])

        laning = raw_y_data['Laning']
        shopping = raw_y_data['Shopping']
        returning = raw_y_data['Returning']
        roaming = raw_y_data['Roaming']
        fighting = raw_y_data['Fighting']
        pushing = raw_y_data['Pushing']
        defending = raw_y_data['Defending']
        dead = raw_y_data['Dead']

        game_events = np.array([laning, shopping, returning, roaming, fighting, pushing, defending, dead])
        return valence, arousal, game_events

class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

Using cuda device


### Model

for the model I use a typical CNN setup (Convolution - relu - maxpool - convolution - relu - 3 fully connected layers). For both of my convolutional layers I used 16 filters, 3x3 kernels, a stride of 1x1 and 0 padding. The pooling layer was 4x4. The 3 fully connected layers have input sizes 13456, 120 and 32. The output is a multi-class, single label output predicting the game state. The loss function is calculated using cross entropy.

In [3]:
#out = ((Input width - filter size + 2* padding) / Stride) + 1
#dropout = 0 is shortcutted
class CNN(nn.Module):
    def __init__(self, img_shape=128, label_shape=8, channel_numbers=[3,16], number_of_c_layers = 2, number_of_filters=[16,16], 
                 kernel_sizes=[3,3], strides=[1,1], paddings=[0,0], fclayout = [120, 32], maxpool = [4, 4], device = "cuda", BN=False, dropout_rate = 0):
        super(CNN, self).__init__()
        
        self.device = device
        self.channel_number = channel_numbers
        self.number_of_filters = number_of_filters
        self.kernel_sizes = kernel_sizes
        self.strides = strides
        self.paddings = paddings
        self.maxpool = maxpool
        self.fclayout = fclayout
        self.fcs = []
        self.label_shape = label_shape
        self.img_shape = img_shape
        self.dropout_rate = dropout_rate
            
        self.net = nn.Sequential()
        
        for i in range(number_of_c_layers): 
            if i == number_of_c_layers-1:
                self.net.add_module("conv2-" + str(i), nn.Conv2d(channel_numbers[i], number_of_filters[i], kernel_sizes[i], stride=strides[i], padding=paddings[i]))
                if BN:
                    self.net.add_module("BN-2d-" + str(i), nn.BatchNorm2d(number_of_filters[i]))
                self.net.add_module("relu-conv-" + str(i), nn.ReLU())
            else:
                self.net.add_module("conv2-" + str(i), nn.Conv2d(channel_numbers[i], number_of_filters[i], kernel_sizes[i], stride=strides[i], padding=paddings[i]))
                if BN:
                    self.net.add_module("BN-2d-" + str(i), nn.BatchNorm2d(number_of_filters[i]))
                self.net.add_module("relu-conv-" + str(i), nn.ReLU())
                self.net.add_module("maxpool-" + str(i), nn.MaxPool2d(maxpool[i], maxpool[i]))
        
        self.net.add_module("flatten1", nn.Flatten())
        
        fc_down = self.net(torch.rand(1, channel_numbers[0], img_shape, img_shape)).size()[1]
        print(fc_down)
        
        self.net.add_module("fc-0", nn.Linear(fc_down, self.fclayout[0]))
        if BN:
            print("BN3: ", self.fclayout[0])
            self.net.add_module("BN-1d-0", nn.BatchNorm1d(self.fclayout[0]))
        self.net.add_module("Drop-0", nn.Dropout(dropout_rate))
        self.net.add_module("relu-fc-0", nn.ReLU())
        
        for i in range(len(fclayout)):
            if i == len(self.fclayout)-1:
                self.net.add_module("fc-" + str(i+1), nn.Linear(fclayout[i], self.label_shape))
            else:
                self.net.add_module("fc-" + str(i+1), nn.Linear(fclayout[i], fclayout[i+1]))
                if BN:
                    print("BN: " + str(i+1), fclayout[i+1])
                    self.net.add_module("BN-1d-" + str(i+1), nn.BatchNorm1d(fclayout[i+1]))
                self.net.add_module("Drop-" + str(i+1), nn.Dropout(dropout_rate))
                self.net.add_module("relu-fc-" + str(i+1), nn.ReLU())
    
    def forward(self, x):
        x = self.net(x)
        return x

def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight.data)

#### Prepare Params and load data

##### What Metrics do we want to look at?

The metrics for my model to be considered are the accuracy and the total cross entropy loss for all batches per epoch. This total cross entropy loss would tell us more information about the speed at which the model is learning, telling us about the loss over all batches per epoch. During training the accuracy of the validaiton set is much more interesting than the accuracy of the training set, since the validation set is applying the model to "new" data. 

-----------------

The model uses xavier weight initialization for every hidden layer. The model wouldn't train at all without proper weight initialization.   

In [4]:
%%time

start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

#----------------------------------
# Generators

#transform leads to full memory
#transform=transforms.Compose([AddGaussianNoise(0., 0.2)
                            #transforms.RandomPerspective(),
                            #transforms.RandomAdjustSharpness(0.8),
                            #transforms.RandomAutocontrast()
                            #])

trainset = StreamLolGame("data_game.npz", "") #, transform = transform)

Wall time: 25.1 s


#### Start Training

In [5]:
#model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=False)
#model.add_module("fc", nn.Linear(512, 8))

def training(trainingloader, max_epochs, optimizer, model, validationloader=None, run_name="", l1_lambda = 0):
    
    scaler = torch.cuda.amp.GradScaler()
    now = datetime.now()

    current_time = now.strftime("%H_%M_%S")

    writer = SummaryWriter("runs/" + run_name)
    
    metrics_tracker = pd.DataFrame(columns=['epoch', 'train_loss', 'train_accuracy','val_loss', 'val_accuracy'])
    epochs_list = []
    train_loss_list = []
    train_accuracy_list = []
    val_loss_list = []
    val_accuracy_list = []
    
    start.record()
    for epochs in range(max_epochs):
        # Training
        correct = 0
        total_labels = 0
        total_loss = 0
        for batch_idx, batch in enumerate(trainingloader):
            optimizer.zero_grad()

            #batch["data"][: (all items in current batch) , 0 (first item in list) , : all channels, : (all pixels h), : (all pixels w)] 
            batch_input = batch["data"][:, 0, :, :, :].to(device).float()

            batch_game_event = batch["game_event_labels"][:, 0, :].to(device).long()

            prediction = model.forward(batch_input)

            argmax_predicton = torch.argmax(prediction, dim = 1)
            argmax_game_event = torch.argmax(batch_game_event, dim = 1)

            with torch.cuda.amp.autocast():
                loss = nn.functional.cross_entropy(prediction, argmax_game_event)
            
            if l1_lambda != 0:
                l1_norm = sum(p.abs().sum() for p in model.parameters())

                loss = loss + l1_lambda * l1_norm

            #loss gets updated after each batch, so a total loss is better to see if model is improving
            total_loss += loss

            total_labels += batch_game_event.size(0)
            correct += (argmax_predicton == argmax_game_event).sum().item()
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        print(f'Epoch: [{epochs+1}/{max_epochs}], total train (sum of) Loss: {total_loss}, Train Acc: {correct} / {total_labels}')
        writer.add_scalar('Loss/train', total_loss, epochs)
        writer.add_scalar('Accuracy/train', correct/total_labels, epochs)
        epochs_list.append(epochs+1)
        train_loss_list.append(total_loss.cpu().detach())
        train_accuracy_list.append(correct/total_labels)
        
        if validationloader:
            correct = 0
            total_labels = 0
            total_loss = 0
            model.eval()  # handle drop-out/batch norm layers
            loss = 0
            with torch.no_grad():
                for batch_idx, batch in enumerate(validationloader):

                    #batch["face"][: (all items in current batch) , 0 (first item in list) , 1 (first frame), : (all pixels h), : (all pixels w)]
                    batch_input = batch["data"][:, 0, :, :, :].to(device).float()

                    batch_game_event = batch["game_event_labels"][:, 0, :].to(device).long()
                    
                    argmax_game_event = torch.argmax(batch_game_event, dim = 1)
                    
                    prediction = model(batch_input)  # only forward pass - NO gradients!!
                    loss += nn.functional.cross_entropy(prediction, argmax_game_event)
                    
                    argmax_predicton = torch.argmax(prediction, dim = 1)
                    argmax_game_event = torch.argmax(batch_game_event, dim = 1)
                    
                    total_labels += batch_game_event.size(0)
                    correct += (argmax_predicton == argmax_game_event).sum().item()
                    
                # total loss - divide by number of batches
                val_loss = loss / len(validationloader)
                
                print(f'Epoch: [{epochs+1}/{max_epochs}], total validation (sum of) Loss: {val_loss}, Validation Acc: {correct} / {total_labels}')
                writer.add_scalar('Loss/validation', val_loss, epochs)
                writer.add_scalar('Accuracy/validation', correct/total_labels, epochs)
                val_loss_list.append(val_loss.cpu().detach())
                val_accuracy_list.append(correct/total_labels)
        
    metrics_tracker = [{'epoch': epochs_list[i], 'train_loss': train_loss_list[i].item(), 'train_accuracy': train_accuracy_list[i], 'val_loss': val_loss_list[i].item(), 'val_accuracy': val_accuracy_list[i]} for i in range(max_epochs)]
    
    end.record()
    # Waits for everything to finish running
    torch.cuda.synchronize()

    print(start.elapsed_time(end)/1000/60, "min")
    torch.cuda.empty_cache()
    return pd.DataFrame(metrics_tracker)

In [8]:
## Aufgabe 3
# SGD ohne Reg ohne BN
# 3 verschiedene LR und Batch Grössen
# Verschiedene Anzahl Filter, Kernel Grössen Stride Padding
# Plot alles zusammen

## Aufgabe 4
# für alle in Aufgabe 3 gemachten Plots, L1/L2 Weight Penalty und 3 verschiedene Dropoutraten

## Aufgabe 5
# BN ohne REG mit SGD

## Aufgabe 6
# Adam, ohne BN ohne/mit REG

metrics_path = "trained_history/"
saved_models_path = "trained_models/"

params_b_64 = {'batch_size': 64, 
          'shuffle': True, 
          'pin_memory': True}

params_b_128 = {'batch_size': 128, 
          'shuffle': True, 
          'pin_memory': True}

params_b_256 = {'batch_size': 256, 
          'shuffle': True, 
          'pin_memory': True}

#add to this for pipeline
params = [params_b_64]

#add to this for pipeline
lrs = [0.01]

dropouts = [0.1, 0.25, 0.5]

val_params = {'batch_size': 512,
                'shuffle': True, 
              'pin_memory': True}

max_epochs = 200

In [9]:
train_set, val_set = torch.utils.data.random_split(trainset, [math.ceil(0.85*len(trainset)), math.floor(0.15*len(trainset))])

validationloader = DataLoader(val_set, **val_params)

for lr in lrs:
    for param in params:
        #for dropout in dropouts:
        #print(str(dropout))

        model = CNN(number_of_c_layers = 2, number_of_filters=[16,32], channel_numbers=[3,16], maxpool=[4,4], kernel_sizes=[3,3], strides=[1, 1], paddings=[0,0], fclayout=[120,32])#, dropout_rate = dropout)

        model.cuda()
        model.apply(weights_init)

        print(model)
        
        trainingloader = DataLoader(train_set, **param)
        run_name = "lr-" + str(lr) + "_batch_size-" + str(param['batch_size']) + "-kernel100"
        results = training(trainingloader, max_epochs, torch.optim.Adam(model.parameters(),lr=0.0001), model, validationloader, run_name=run_name)
        results.to_csv(metrics_path + run_name + ".csv", index=False)
        torch.save(model.state_dict(), saved_models_path + run_name)

26912
CNN(
  (net): Sequential(
    (conv2-0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
    (relu-conv-0): ReLU()
    (maxpool-0): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (conv2-1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (relu-conv-1): ReLU()
    (flatten1): Flatten(start_dim=1, end_dim=-1)
    (fc-0): Linear(in_features=26912, out_features=120, bias=True)
    (Drop-0): Dropout(p=0, inplace=False)
    (relu-fc-0): ReLU()
    (fc-1): Linear(in_features=120, out_features=32, bias=True)
    (Drop-1): Dropout(p=0, inplace=False)
    (relu-fc-1): ReLU()
    (fc-2): Linear(in_features=32, out_features=8, bias=True)
  )
)
Epoch: [1/200], total train (sum of) Loss: 713.037109375, Train Acc: 4946 / 22893
Epoch: [1/200], total validation (sum of) Loss: 1.978102684020996, Validation Acc: 976 / 4039


KeyboardInterrupt: 

In [None]:
checkpoint = torch.load(saved_models_path + "lr-0.1_batch_size-64-kernel-5")
print(checkpoint)
model.load_state_dict(checkpoint)
print(model)

model.cuda()

trainingloader = DataLoader(train_set, **params_b_64)
run_name = "lr-0.1_batch_size-64-kernel-5"
results = training(trainingloader, max_epochs, torch.optim.SGD(model.parameters(),lr=0.01), model, validationloader, run_name=run_name)
results.to_csv(metrics_path + run_name + ".csv", index=False)
torch.save(model.state_dict(), saved_models_path + run_name)