In [1]:
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, IterableDataset

import os
import time
import numpy as np
from typing import List, Tuple

In [2]:
torch.cuda.is_available()

True

In [3]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 1

INPUT_SIZE = 1
HIDDEN_SIZE = 1024 // 2
LINEAR_INPUT_SIZE = 64

LEARNING_RATE = 0.003 #0.0005
L2_WEIGTH_DECAY = 0.0001
EPOCHS = 10

LABELS = {
    1: 0,
    2: 1,
    3: 2,
    4: 3,
    5: 4,
    6: 5,
    7: 6,
    8: 7,
    9: 8,
    10: 9,
    11: 10,
    13: 11,
    15: 12,
    17: 13,
    19: 14,
    20: 15,
    22: 16,
    23: 17,
    25: 18,
    28: 19,
    29: 20,
    30: 21,
    31: 22,
    32: 23,
    33: 24,
    34: 25,
    35: 26,
    36: 27,
    37: 28,
    38: 29,
    39: 30,
    40: 31,
    41: 32,
    42: 33,
    43: 34,
    44: 35,
    45: 36,
    46: 37,
    47: 38,
    48: 39,
    49: 40,
    50: 41,
    51: 42
}

In [4]:
device

device(type='cuda')

In [None]:
# TODO
# https://pytorch.org/docs/stable/data.html#torch.utils.data.IterableDataset

In [5]:
class IterableMovementDataset(IterableDataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root: str, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []
        
        self.classes = LABELS
        
        self.loaded_data = dict()
        for file_name in self.data_files:
            with open(os.path.join(self.root, file_name), "r") as f:
                self.loaded_data[file_name] = f.read().rstrip('\n').split('\n')

    def _get_file_length(self, file_data: List[str]):
        header = file_data[0].split()[-1].split("_")
        return int(header[-1])

    def __iter__(self):
        for i, file_name in enumerate(self.data_files):
            #action_file = os.path.join(self.root, file_name)
            #with open(action_file, "r") as f:
            #    data_str = f.read().rstrip('\n').split('\n')
            data_str = self.loaded_data[file_name]
            
            sequence_length = self._get_file_length(data_str)
            
            all_frames = []
            for frame in data_str[2:]:  # first two header lines in the file
                all_frames.append(
                    [triple.split(", ") for triple in frame.split("; ")]
                )
            
            all_frames = np.array(all_frames, dtype=np.float32)
            '''
            frame = np.array(
                [
                    triple.split(", ") for triple in data_str[line_indx].split(";")
                ],
                dtype=np.float32
            )
            '''
            assert all_frames.shape == (sequence_length, self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
    
            # get sequence label
            target = [self.classes[int(data_str[0].split()[-1].split("_")[1])]] * sequence_length
            target = np.array(target)
        
            if self.transforms:
                all_frames = self.transforms(all_frames)

            yield all_frames, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [6]:
class BiRNN(nn.Module):

    def __init__(self, input_size: int, hidden_size: int, linear_input_size: int, num_classes: int):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = 2

        self.lstm = nn.LSTM(input_size, hidden_size, self.num_layers, batch_first=True, bidirectional=True)
        self.embedding = nn.Linear(self.hidden_size * 2, linear_input_size)
        self.relu = nn.ReLU()
        self.do = nn.Dropout(0.5)
        self.classifier = nn.Linear(linear_input_size, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)

        # Forward to LSTM
        out, _ = self.lstm(x, (h0, c0))  # output format: (batch_size, seq_length, hidden_size * 2)
        out = self.embedding(out[:, -1, :])
        out = self.relu(out)
        out = self.do(out)
        out = self.classifier(out)
        out = self.sigmoid(out)
        return out

In [58]:
# Torch dataloader
class MovementsDataset(Dataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []

        self.classes = LABELS
        
        # Load number of frames for every file
        for fn in self.data_files:
            with open(os.path.join(self.root, fn)) as f:
                header = f.readline().split()[-1].split("_")
                
                self.file_frames.append(int(header[-1]))  # last element - number_of_frames
        
    def _get_file_index(self, frame_indx) -> Tuple[int, int]:
        start_indx = frame_indx
        for i, nof in enumerate(self.file_frames):
            if start_indx < nof:
                # print(f"{start_indx} - {i}")
                return i, start_indx
            else:
                start_indx -= nof
        
    def __getitem__(self, indx):
        file_indx, line_indx = self._get_file_index(indx)
        action_file = os.path.join(self.root, self.data_files[file_indx])
        
        with open(action_file, "r") as f:
            data_str = f.read().rstrip('\n').split('\n')
        
        line_indx += 2  # first two header lines in the file   
        frame = np.array([triple.split(", ") for triple in data_str[line_indx].split(";")], dtype=np.float32)
        assert frame.shape == (self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
        
        target = self.classes[int(data_str[0].split()[-1].split("_")[1])]
        
        if self.transforms:
            frame = self.transforms(frame)
        
        return frame, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [7]:
train_dataset = IterableMovementDataset(
    "../data/cross-subject/train",
    transforms=transforms.ToTensor()
)
test_dataset = IterableMovementDataset(
    "../data/cross-subject/val",
    transforms=transforms.ToTensor()
)

In [8]:
train_loader = DataLoader(train_dataset) #, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset) #, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
device

device(type='cuda')

In [None]:
# set-up
#  - Adam optimizer *
#  - LR = 0.0005 *
#  - batch = 1 *
#  - L2 weight decay = 0.0001 *
#  - dropout = 0.5 *
#  - 200 epochs
#  - Embedding - 64 *
#  - Hidden-state - 1024 --> halved for Bi-LSTM *

In [79]:
# Training the network
model = BiRNN(75, HIDDEN_SIZE, LINEAR_INPUT_SIZE, len(train_dataset.classes)).to(device)

if os.path.exists("model.pth"):
    model.load_state_dict(torch.load('model.pth'))
    print("Model loaded")
else:
    print("Pretrained model not found")

PRINT_STEP = 999
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=L2_WEIGTH_DECAY
)

# Train the model
for epoch in range(20):
    s_time = time.time()
    total_loss = 0.0
    
    for i, (sequence, labels) in enumerate(train_loader, 1):
        #print(sequence.shape)
        #print(labels.shape)
        
        sequence = sequence.reshape(-1, 1, 25 * 3).to(device)
        labels = labels.reshape(-1).to(device)
        
        #print(sequence.shape)
        #print(labels.shape)

        # forward pass
        outputs = model(sequence)
        
        #print(outputs.shape)
        #print(labels.shape)

        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % PRINT_STEP == 0:
            print(f"{i}/{len(train_loader)} -  Epoch [{epoch + 1}/{EPOCHS}], average_loss: {round(total_loss / PRINT_STEP, 4)}")
            total_loss = 0.0
    
    print(f"Evaluation time: {time.time() - s_time}s.")
    #break
    
print("DONE")

Model loaded
999/17311 -  Epoch [1/10], average_loss: 3.4436
1998/17311 -  Epoch [1/10], average_loss: 3.4168
2997/17311 -  Epoch [1/10], average_loss: 3.4205
3996/17311 -  Epoch [1/10], average_loss: 3.3931
4995/17311 -  Epoch [1/10], average_loss: 3.4031
5994/17311 -  Epoch [1/10], average_loss: 3.4089
6993/17311 -  Epoch [1/10], average_loss: 3.3753
7992/17311 -  Epoch [1/10], average_loss: 3.4082
8991/17311 -  Epoch [1/10], average_loss: 3.3676
9990/17311 -  Epoch [1/10], average_loss: 3.3478
10989/17311 -  Epoch [1/10], average_loss: 3.3945
11988/17311 -  Epoch [1/10], average_loss: 3.3641
12987/17311 -  Epoch [1/10], average_loss: 3.42
13986/17311 -  Epoch [1/10], average_loss: 3.3764
14985/17311 -  Epoch [1/10], average_loss: 3.4207
15984/17311 -  Epoch [1/10], average_loss: 3.3365
16983/17311 -  Epoch [1/10], average_loss: 3.3594
Evaluation time: 152.66332149505615s.
999/17311 -  Epoch [2/10], average_loss: 3.4192
1998/17311 -  Epoch [2/10], average_loss: 3.4063
2997/17311 -  E

7992/17311 -  Epoch [10/10], average_loss: 3.3726
8991/17311 -  Epoch [10/10], average_loss: 3.3331
9990/17311 -  Epoch [10/10], average_loss: 3.3293
10989/17311 -  Epoch [10/10], average_loss: 3.3664
11988/17311 -  Epoch [10/10], average_loss: 3.3328
12987/17311 -  Epoch [10/10], average_loss: 3.3902
13986/17311 -  Epoch [10/10], average_loss: 3.3463
14985/17311 -  Epoch [10/10], average_loss: 3.4017
15984/17311 -  Epoch [10/10], average_loss: 3.3142
16983/17311 -  Epoch [10/10], average_loss: 3.3465
Evaluation time: 154.52292799949646s.
999/17311 -  Epoch [11/10], average_loss: 3.3996
1998/17311 -  Epoch [11/10], average_loss: 3.3832
2997/17311 -  Epoch [11/10], average_loss: 3.373
3996/17311 -  Epoch [11/10], average_loss: 3.3598
4995/17311 -  Epoch [11/10], average_loss: 3.359
5994/17311 -  Epoch [11/10], average_loss: 3.3726
6993/17311 -  Epoch [11/10], average_loss: 3.3431
7992/17311 -  Epoch [11/10], average_loss: 3.3764
8991/17311 -  Epoch [11/10], average_loss: 3.334
9990/1731

11988/17311 -  Epoch [19/10], average_loss: 3.3324
12987/17311 -  Epoch [19/10], average_loss: 3.3879
13986/17311 -  Epoch [19/10], average_loss: 3.3443
14985/17311 -  Epoch [19/10], average_loss: 3.3972
15984/17311 -  Epoch [19/10], average_loss: 3.3109
16983/17311 -  Epoch [19/10], average_loss: 3.3433
Evaluation time: 152.6308238506317s.
999/17311 -  Epoch [20/10], average_loss: 3.3959
1998/17311 -  Epoch [20/10], average_loss: 3.3785
2997/17311 -  Epoch [20/10], average_loss: 3.3836
3996/17311 -  Epoch [20/10], average_loss: 3.3636
4995/17311 -  Epoch [20/10], average_loss: 3.3691
5994/17311 -  Epoch [20/10], average_loss: 3.3757
6993/17311 -  Epoch [20/10], average_loss: 3.3388
7992/17311 -  Epoch [20/10], average_loss: 3.3743
8991/17311 -  Epoch [20/10], average_loss: 3.3388
9990/17311 -  Epoch [20/10], average_loss: 3.3269
10989/17311 -  Epoch [20/10], average_loss: 3.3587
11988/17311 -  Epoch [20/10], average_loss: 3.3256
12987/17311 -  Epoch [20/10], average_loss: 3.3881
13986

In [80]:
torch.save(model.state_dict(), 'model.pth')

In [82]:
# Test model
with torch.no_grad():
    steps = 10
    def_dict = {
        "correct": 0,
        "above": 0,
    }
    thresholds = [(round((1 / steps) * (i + 1), 4), def_dict.copy()) for i in range(steps)]
    total = 0

    for i, (sequence, labels) in enumerate(test_loader):
        sequence = sequence.reshape(-1, 1, 25 * 3).to(device)
        labels = labels.reshape(-1).to(device)
        
        outputs = model(sequence)

        for val_th, res_dict in thresholds:
            res_dict["above"] += (outputs.data > val_th).sum().item()
            
            # Correctly predicted
            for record_i, label_i in zip(*torch.where(outputs.data > val_th)):
                if label_i == labels[record_i]:
                    res_dict["correct"] += 1
        
        total += labels.size(0)

        if i % 49 == 0:
            print(f"Processed: [{i}/{len(test_dataset)}]")

    ## Calculate result measurements for every threshold
    for th, values in thresholds:
        recall = values["correct"] / total
    
        if values["above"] > 0:
            precision = values["correct"] / values["above"]
            f1_score = 2 * (precision * recall) / (precision + recall)
        else:
            precision = 0
            f1_score = 0

        print("\n" + "-" * 15 + f"{th}" + "-" * 15)
        print(f"Test Precision: {round(100 * precision, 4)}%")
        print(f"Test Recall: {round(100 * recall, 4)}%")
        print(f"Test f1_score: {round(f1_score, 4)}")

# Save model
#torch.save(model.state_dict(), 'model.pth')

Processed: [0/2509]
Processed: [49/2509]
Processed: [98/2509]
Processed: [147/2509]
Processed: [196/2509]
Processed: [245/2509]
Processed: [294/2509]
Processed: [343/2509]
Processed: [392/2509]
Processed: [441/2509]
Processed: [490/2509]
Processed: [539/2509]
Processed: [588/2509]
Processed: [637/2509]
Processed: [686/2509]
Processed: [735/2509]
Processed: [784/2509]
Processed: [833/2509]
Processed: [882/2509]
Processed: [931/2509]
Processed: [980/2509]
Processed: [1029/2509]
Processed: [1078/2509]
Processed: [1127/2509]
Processed: [1176/2509]
Processed: [1225/2509]
Processed: [1274/2509]
Processed: [1323/2509]
Processed: [1372/2509]
Processed: [1421/2509]
Processed: [1470/2509]
Processed: [1519/2509]
Processed: [1568/2509]
Processed: [1617/2509]
Processed: [1666/2509]
Processed: [1715/2509]
Processed: [1764/2509]
Processed: [1813/2509]
Processed: [1862/2509]
Processed: [1911/2509]
Processed: [1960/2509]
Processed: [2009/2509]
Processed: [2058/2509]
Processed: [2107/2509]
Processed: [2

In [None]:
####### Evalution metrics

In [None]:
# AP-score

In [None]:
# Calculate precision
#  - Precision: the ratio of correctly annotated frames and all the model-annotated frames on test sequences
#  - spravne anotovane (correctly annotates)
#  - zoberiem threshold pre kazdy output v kazdom snimku --> pocet tried do ktorych sa klasifikoval snimok (all model-annotated frames)

In [None]:
# Calculate F-measure

In [None]:
# Calculate PKU-MMD measure
# - evaluacia prebieha na celom datasette, THRESHOLD-0 je hranica kedy rozhodujem
# - mam multi-class clasifikaciu, kedze beriem cely dataset, pozitivne/negativne do confusion-matrix
#   a z toho vypocutam Recall & Precision

In [None]:
# Otazky
#  - co je threshold - 0 ??
#  - kedze klasifikujem tak beriem vysledok s najvacsou hodnotou, neklasifikujem na positivne/negativne ale
#    na presnu kategoriu, takze ako mam zakomponovat threshold do toho?
#  - Ako mam vyuzit Average Precision Protocol, chapem ako funguje IOU pri detekcii objektov, co toto je podobny
#    princip, akurat si niesom isty ci moj vypocet je spravny kedze I u I* je vzdy v podsatate dlzka sekvencie
#    nikdy to nebude nejake vacsie cislo (oproti IOU kde boxy mozu vytvorit roznu hodnotu zjednotenia), pretoze
#    v tom pripade je ten vypocet v podsatate Accuracy, Pocet spravnych klasifokovanych framov / celkovy pocet?
#  - taktiez k comu mam potom vyuzit ten threshold 0 pri Precision a Recall ?? Kedze vypocet je z confusion
#    matrix, kde su kategorie jasne zadefinovane, kedze beriem vzdy maximum pre vsetky kategorie