In [1]:
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, IterableDataset
from torch.utils.tensorboard import SummaryWriter

import os
import json
import time
import numpy as np
from typing import List, Tuple

In [2]:
torch.cuda.is_available()

True

In [3]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 1

INPUT_SIZE = 25 * 3
HIDDEN_SIZE = 1024 // 2
EMBEDDING_INPUT_SIZE = 64

LEARNING_RATE = 0.0005
L2_WEIGTH_DECAY = 0.0001
EPOCHS = 200

LABELS = {
    1: 0,
    2: 1,
    3: 2,
    4: 3,
    5: 4,
    6: 5,
    7: 6,
    8: 7,
    9: 8,
    10: 9,
    11: 10,
    13: 11,
    15: 12,
    17: 13,
    19: 14,
    20: 15,
    22: 16,
    23: 17,
    25: 18,
    28: 19,
    29: 20,
    30: 21,
    31: 22,
    32: 23,
    33: 24,
    34: 25,
    35: 26,
    36: 27,
    37: 28,
    38: 29,
    39: 30,
    40: 31,
    41: 32,
    42: 33,
    43: 34,
    44: 35,
    45: 36,
    46: 37,
    47: 38,
    48: 39,
    49: 40,
    50: 41,
    51: 42
}

In [4]:
device

device(type='cuda')

In [None]:
# TODO
# https://pytorch.org/docs/stable/data.html#torch.utils.data.IterableDataset

In [102]:
class IterableMovementDataset(IterableDataset):

    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root: str, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []
        
        self.classes = LABELS
        
        self.loaded_data = dict()
        for file_name in self.data_files:
            with open(os.path.join(self.root, file_name), "r") as f:
                self.loaded_data[file_name] = f.read().rstrip('\n').split('\n')

    def _get_file_length(self, file_data: List[str]):
        header = file_data[0].split()[-1].split("_")
        return int(header[-1])

    def __iter__(self):
        for i, file_name in enumerate(self.data_files):
            #action_file = os.path.join(self.root, file_name)
            #with open(action_file, "r") as f:
            #    data_str = f.read().rstrip('\n').split('\n')
            data_str = self.loaded_data[file_name]
            
            sequence_length = self._get_file_length(data_str)
            
            all_frames = []
            for frame in data_str[2:]:  # first two header lines in the file
                all_frames.append(
                    [triple.split(", ") for triple in frame.split("; ")]
                )
            
            all_frames = np.array(all_frames, dtype=np.float32)
            '''
            frame = np.array(
                [
                    triple.split(", ") for triple in data_str[line_indx].split(";")
                ],
                dtype=np.float32
            )
            '''
            assert all_frames.shape == (sequence_length, self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)

            # get sequence label
            label = self.classes[int(data_str[0].split()[-1].split("_")[1])]
            target = np.zeros(len(self.classes), dtype=np.float32)
            target[label] = 1.0
        
            all_frames = torch.from_numpy(all_frames)
        
            #if self.transforms:
            #    all_frames = self.transforms(all_frames)

            yield all_frames, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [97]:
class BiRNN(nn.Module):
    
    def __init__(self, input_size: int, lstm_hidden_size: int, embedding_output_size: int, num_classes: int):
        super().__init__()
        self.hidden_size = lstm_hidden_size
        self.num_layers = 2  # bi-LSTM
        
        # Embedding part, from 75 -> 64 size
        self.embedding = nn.Linear(input_size, embedding_output_size)
        self.relu = nn.ReLU()
        
        self.lstm = nn.LSTM(embedding_output_size, lstm_hidden_size, self.num_layers, batch_first=True, bidirectional=True)
        self.do = nn.Dropout(0.5)
        self.classifier = nn.Linear(self.hidden_size * self.num_layers, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        
        # Embedding
        out = self.embedding(x)
        out = self.relu(out)
        
        # Bi-LSTM
        #print(f"O: {out.size()}")  # (143, 1, 64) (batch, seq, features)
        #out = out.reshape(1, 1, -1)
        
        out, _ = self.lstm(out, (h0, c0))
        
        out = self.do(out[:, -1, :])
        out = self.classifier(out)
        return self.sigmoid(out)

In [58]:
# Torch dataloader
class MovementsDataset(Dataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []

        self.classes = LABELS
        
        # Load number of frames for every file
        for fn in self.data_files:
            with open(os.path.join(self.root, fn)) as f:
                header = f.readline().split()[-1].split("_")
                
                self.file_frames.append(int(header[-1]))  # last element - number_of_frames
        
    def _get_file_index(self, frame_indx) -> Tuple[int, int]:
        start_indx = frame_indx
        for i, nof in enumerate(self.file_frames):
            if start_indx < nof:
                # print(f"{start_indx} - {i}")
                return i, start_indx
            else:
                start_indx -= nof
        
    def __getitem__(self, indx):
        file_indx, line_indx = self._get_file_index(indx)
        action_file = os.path.join(self.root, self.data_files[file_indx])
        
        with open(action_file, "r") as f:
            data_str = f.read().rstrip('\n').split('\n')
        
        line_indx += 2  # first two header lines in the file   
        frame = np.array([triple.split(", ") for triple in data_str[line_indx].split(";")], dtype=np.float32)
        assert frame.shape == (self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
        
        target = self.classes[int(data_str[0].split()[-1].split("_")[1])]
        
        if self.transforms:
            frame = self.transforms(frame)
        
        return frame, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [103]:
train_dataset = IterableMovementDataset(
    "../data/cross-view/train",
)
test_dataset = IterableMovementDataset(
    "../data/cross-view/val",
)

In [110]:
train_loader = DataLoader(train_dataset, batch_size=1) #, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset) #, batch_size=BATCH_SIZE, shuffle=False)

In [31]:
device

device(type='cuda')

In [None]:
# set-up
#  - Adam optimizer *
#  - LR = 0.0005 *
#  - batch = 1 *
#  - L2 weight decay = 0.0001 *
#  - dropout = 0.5 *
#  - 200 epochs
#  - Embedding - 64 *
#  - Hidden-state - 1024 --> halved for Bi-LSTM *

In [114]:
def save_model(model, model_name: str, epoch: int):
    torch.save(model.state_dict(), f"{model_name}.pth")
    with open("last_checkpoint", "w") as lf:
        lf.write(str(epoch))
    print(f"Model saved into: {model_name}.pth")

In [117]:
# Training the network
model = BiRNN(INPUT_SIZE, HIDDEN_SIZE, EMBEDDING_INPUT_SIZE, len(train_dataset.classes)).to(device)

start_epoch = 0
if os.path.exists("model.pth"):
    model.load_state_dict(torch.load('model.pth'))
    with open("last_checkpoint", "r") as lf:
        start_epoch = int(lf.read())
    print(f"Model loaded with epoch: {start_epoch}")
else:
    print("Pretrained model not found")

SAVE_CHECHPOINT = 50
PRINT_STEP = 999
board_writer = SummaryWriter()

criterion = nn.BCELoss()  #nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=L2_WEIGTH_DECAY
)

# Train the model
for epoch in range(start_epoch, start_epoch + 90):  #EPOCHS):
    s_time = time.time()
    total_loss = 0.0
    total_iterations_per_epoch = len(train_loader)
    
    for i, (sequence, labels) in enumerate(train_loader, 1):
        #print(sequence.shape)
        #print(labels.shape)

        # (batch, seq_len, num_of_features)
        sequence = sequence.view(sequence.size(0), sequence.size(1), -1).to(device)
        labels = labels.to(device)
        
        #print(sequence.shape)
        #print(labels.shape)
        #break

        # forward pass
        outputs = model(sequence)
        
        #print(outputs.shape)
        #print(labels.shape)
        #break

        loss = criterion(outputs, labels)
        total_loss += loss.item()
        
        #print(total_loss)
        #break

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % PRINT_STEP == 0:
            average_loss = total_loss / PRINT_STEP
            print(f"{i}/{len(train_loader)} -  Epoch [{epoch + 1}/{EPOCHS}], average_loss: {round(average_loss, 6)}")
            total_loss = 0.0

            board_writer.add_scalar(
                'Average_Loss/train',
                average_loss,
                (epoch * total_iterations_per_epoch) + i
            )

    print(f"Evaluation time: {time.time() - s_time}s.")

    if (epoch + 1) % SAVE_CHECHPOINT == 0:
        save_model(model, f"model_{epoch}", epoch)

save_model(model, "model", epoch)
print("DONE")

Model loaded with epoch: 0
999/13207 -  Epoch [1/200], average_loss: 0.113635
1998/13207 -  Epoch [1/200], average_loss: 0.113361
2997/13207 -  Epoch [1/200], average_loss: 0.113285
3996/13207 -  Epoch [1/200], average_loss: 0.113283
4995/13207 -  Epoch [1/200], average_loss: 0.112483
5994/13207 -  Epoch [1/200], average_loss: 0.112738
6993/13207 -  Epoch [1/200], average_loss: 0.112629
7992/13207 -  Epoch [1/200], average_loss: 0.117491
8991/13207 -  Epoch [1/200], average_loss: 0.11219
9990/13207 -  Epoch [1/200], average_loss: 0.111831
10989/13207 -  Epoch [1/200], average_loss: 0.107128
11988/13207 -  Epoch [1/200], average_loss: 0.100046
12987/13207 -  Epoch [1/200], average_loss: 0.098777
Evaluation time: 459.68922567367554s.
999/13207 -  Epoch [2/200], average_loss: 0.103275
1998/13207 -  Epoch [2/200], average_loss: 0.099607
2997/13207 -  Epoch [2/200], average_loss: 0.097289
3996/13207 -  Epoch [2/200], average_loss: 0.095844
4995/13207 -  Epoch [2/200], average_loss: 0.094424

In [21]:
save_model(model, "model", epoch)

In [136]:
def save_results(dict_results, total_records):
    with open("model_results.json", "w") as jf:
        json.dump({
            "thresholds": dict_results,
            "total_records": total_records
        }, jf)

In [145]:
# Test model
with torch.no_grad():
    steps = 20
    def_dict = {
        "correct": 0,
        "above": 0,
    }
    thresholds = [(round((1 / steps) * (i + 1), 4), def_dict.copy()) for i in range(steps)]
    total = 0

    for i, (sequence, labels) in enumerate(test_loader):
        sequence = sequence.view(sequence.size(0), sequence.size(1), -1).to(device)
        label_id = torch.argmax(labels).item()
        
        outputs = model(sequence)

        for val_th, res_dict in thresholds:
            res_dict["above"] += (outputs.data > val_th).sum().item()

            # Correctly predicted
            for _, label_true_indx in zip(*torch.where(outputs.data > val_th)):
                if label_true_indx == label_id:
                    res_dict["correct"] += 1
        
        total += 1 # labels.size(0)

        if i % 49 == 0:
            print(f"Processed: [{i}/{len(test_dataset)}]")

# Save dictionary results
save_results(thresholds, total)

Processed: [0/837]
Processed: [49/837]
Processed: [98/837]
Processed: [147/837]
Processed: [196/837]
Processed: [245/837]
Processed: [294/837]
Processed: [343/837]
Processed: [392/837]
Processed: [441/837]
Processed: [490/837]
Processed: [539/837]
Processed: [588/837]
Processed: [637/837]
Processed: [686/837]
Processed: [735/837]
Processed: [784/837]
Processed: [833/837]


In [137]:
save_results(thresholds, total)

In [51]:
# Load results
with open("model_cv/model_results.json", "r") as jf:
    data = json.load(jf)

thresholds = data["thresholds"]
total = data["total_records"]

In [148]:
# Calculation of evalution metrics for every threshold
ap_score = 0
old_recall = 0
for th, values in thresholds[:-1]:
    recall = values["correct"] / total

    if values["above"] > 0:
        precision = values["correct"] / values["above"]
        f1_score = 2 * (precision * recall) / (precision + recall)
    else:
        precision = 0
        f1_score = 0

    print("\n" + "-" * 15 + f"{th}" + "-" * 15)
    print(f"Test Precision: {round(100 * precision, 4)}%")
    print(f"Test Recall: {round(100 * recall, 4)}%")
    print(f"Test f1_score: {round(f1_score, 4)}")

    ap_score += ((abs(recall - old_recall)) * precision)
    old_recall = recall

# AP - score
print("-" * 32)
print("-" * 14 + " AP " + "-" * 14)
print(f"AP: {round(ap_score, 4)}")


---------------0.05---------------
Test Precision: 21.0055%
Test Recall: 86.8578%
Test f1_score: 0.3383

---------------0.1---------------
Test Precision: 29.4722%
Test Recall: 75.3883%
Test f1_score: 0.4238

---------------0.15---------------
Test Precision: 35.036%
Test Recall: 63.9188%
Test f1_score: 0.4526

---------------0.2---------------
Test Precision: 39.3707%
Test Recall: 55.3166%
Test f1_score: 0.46

---------------0.25---------------
Test Precision: 44.8619%
Test Recall: 48.5066%
Test f1_score: 0.4661

---------------0.3---------------
Test Precision: 49.0541%
Test Recall: 43.3692%
Test f1_score: 0.4604

---------------0.35---------------
Test Precision: 52.3179%
Test Recall: 37.7539%
Test f1_score: 0.4386

---------------0.4---------------
Test Precision: 55.3753%
Test Recall: 32.6165%
Test f1_score: 0.4105

---------------0.45---------------
Test Precision: 60.7046%
Test Recall: 26.7622%
Test f1_score: 0.3715

---------------0.5---------------
Test Precision: 64.8649%
Te

In [None]:
# TODO
#  - evaluacne metriky
#  - format dat pre trenovanie

In [None]:
####### Evalution metrics

In [None]:
# Calculate precision
#  - Precision: the ratio of correctly annotated frames and all the model-annotated frames on test sequences
#  - spravne anotovane (correctly annotates)
#  - zoberiem threshold pre kazdy output v kazdom snimku --> pocet tried do ktorych sa klasifikoval snimok (all model-annotated frames)