In [1]:
import torch
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, IterableDataset
from torch.utils.tensorboard import SummaryWriter

import os
import json
import time
import numpy as np
from typing import List, Tuple

In [2]:
torch.cuda.is_available()

True

In [111]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 1

INPUT_SIZE = 25 * 3
HIDDEN_SIZE = 1024 // 2
EMBEDDING_INPUT_SIZE = 64

LEARNING_RATE = 0.0005
L2_WEIGTH_DECAY = 0.0001
EPOCHS = 200

LABELS = {
    1: 0,
    2: 1,
    3: 2,
    4: 3,
    5: 4,
    6: 5,
    7: 6,
    8: 7,
    9: 8,
    10: 9,
    11: 10,
    13: 11,
    15: 12,
    17: 13,
    19: 14,
    20: 15,
    22: 16,
    23: 17,
    25: 18,
    28: 19,
    29: 20,
    30: 21,
    31: 22,
    32: 23,
    33: 24,
    34: 25,
    35: 26,
    36: 27,
    37: 28,
    38: 29,
    39: 30,
    40: 31,
    41: 32,
    42: 33,
    43: 34,
    44: 35,
    45: 36,
    46: 37,
    47: 38,
    48: 39,
    49: 40,
    50: 41,
    51: 42
}

In [4]:
device

device(type='cuda')

In [None]:
# TODO
# https://pytorch.org/docs/stable/data.html#torch.utils.data.IterableDataset

In [166]:
class IterableMovementDataset(IterableDataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root: str, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []
        
        self.classes = LABELS
        
        self.loaded_data = dict()
        for file_name in self.data_files:
            with open(os.path.join(self.root, file_name), "r") as f:
                self.loaded_data[file_name] = f.read().rstrip('\n').split('\n')

    def _get_file_length(self, file_data: List[str]):
        header = file_data[0].split()[-1].split("_")
        return int(header[-1])

    def __iter__(self):
        for i, file_name in enumerate(self.data_files):
            #action_file = os.path.join(self.root, file_name)
            #with open(action_file, "r") as f:
            #    data_str = f.read().rstrip('\n').split('\n')
            data_str = self.loaded_data[file_name]
            
            sequence_length = self._get_file_length(data_str)
            
            all_frames = []
            for frame in data_str[2:]:  # first two header lines in the file
                all_frames.append(
                    [triple.split(", ") for triple in frame.split("; ")]
                )
            
            all_frames = np.array(all_frames, dtype=np.float32)
            '''
            frame = np.array(
                [
                    triple.split(", ") for triple in data_str[line_indx].split(";")
                ],
                dtype=np.float32
            )
            '''
            assert all_frames.shape == (sequence_length, self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
    
            # get sequence label
            #target = [self.classes[int(data_str[0].split()[-1].split("_")[1])]] * sequence_length
            #target = np.array(target)
            label = self.classes[int(data_str[0].split()[-1].split("_")[1])]
            target = np.zeros(len(self.classes), dtype=np.float64)
            target[label] = 1.0

            #target = torch.from_numpy(target).double()
        
            if self.transforms:
                all_frames = self.transforms(all_frames)

            yield all_frames, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [6]:
class BiRNN(nn.Module):

    def __init__(self, input_size: int, hidden_size: int, linear_input_size: int, num_classes: int):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = 2

        self.lstm = nn.LSTM(input_size, hidden_size, self.num_layers, batch_first=True, bidirectional=True)
        self.embedding = nn.Linear(self.hidden_size * 2, linear_input_size)
        self.relu = nn.ReLU()
        self.do = nn.Dropout(0.5)
        self.classifier = nn.Linear(linear_input_size, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)

        # Forward to LSTM
        out, _ = self.lstm(x, (h0, c0))  # output format: (batch_size, seq_length, hidden_size * 2)
        out = self.embedding(out[:, -1, :])
        out = self.relu(out)
        out = self.do(out)
        out = self.classifier(out)
        out = self.sigmoid(out)
        return out

In [114]:
class NewBiRNN(nn.Module):
    
    def __init__(self, input_size: int, lstm_hidden_size: int, embedding_output_size: int, num_classes: int):
        super().__init__()
        self.hidden_size = lstm_hidden_size
        self.num_layers = 2  # bi-LSTM
        
        # Embedding part, from 75 -> 64 size
        self.embedding = nn.Linear(input_size, embedding_output_size)
        self.relu = nn.ReLU()
        
        self.lstm = nn.LSTM(embedding_output_size, lstm_hidden_size, self.num_layers, batch_first=True, bidirectional=True)
        self.do = nn.Dropout(0.5)
        self.classifier = nn.Linear(self.hidden_size * self.num_layers, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        
        # Embedding
        out = self.embedding(x)
        out = self.relu(out)
        
        # Bi-LSTM
        #print(f"O: {out.size()}")  # (143, 1, 64) (batch, seq, features)
        #out = out.reshape(1, 1, -1)
        
        out, _ = self.lstm(out, (h0, c0))
        
        out = self.do(out[:, -1, :])
        out = self.classifier(out)
        return self.sigmoid(out)

In [58]:
# Torch dataloader
class MovementsDataset(Dataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []

        self.classes = LABELS
        
        # Load number of frames for every file
        for fn in self.data_files:
            with open(os.path.join(self.root, fn)) as f:
                header = f.readline().split()[-1].split("_")
                
                self.file_frames.append(int(header[-1]))  # last element - number_of_frames
        
    def _get_file_index(self, frame_indx) -> Tuple[int, int]:
        start_indx = frame_indx
        for i, nof in enumerate(self.file_frames):
            if start_indx < nof:
                # print(f"{start_indx} - {i}")
                return i, start_indx
            else:
                start_indx -= nof
        
    def __getitem__(self, indx):
        file_indx, line_indx = self._get_file_index(indx)
        action_file = os.path.join(self.root, self.data_files[file_indx])
        
        with open(action_file, "r") as f:
            data_str = f.read().rstrip('\n').split('\n')
        
        line_indx += 2  # first two header lines in the file   
        frame = np.array([triple.split(", ") for triple in data_str[line_indx].split(";")], dtype=np.float32)
        assert frame.shape == (self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
        
        target = self.classes[int(data_str[0].split()[-1].split("_")[1])]
        
        if self.transforms:
            frame = self.transforms(frame)
        
        return frame, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [163]:
train_dataset = IterableMovementDataset(
    "../data/cross-view/train",
    transforms=transforms.ToTensor()
)
test_dataset = IterableMovementDataset(
    "../data/cross-view/val",
    transforms=transforms.ToTensor()
)

In [164]:
train_loader = DataLoader(train_dataset) #, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset) #, batch_size=BATCH_SIZE, shuffle=False)

In [31]:
device

device(type='cuda')

In [None]:
# set-up
#  - Adam optimizer *
#  - LR = 0.0005 *
#  - batch = 1 *
#  - L2 weight decay = 0.0001 *
#  - dropout = 0.5 *
#  - 200 epochs
#  - Embedding - 64 *
#  - Hidden-state - 1024 --> halved for Bi-LSTM *

In [127]:
def save_model(model, model_name: str, epoch: int):
    torch.save(model.state_dict(), f"{model_name}.pth")
    with open("last_checkpoint", "w") as lf:
        lf.write(str(epoch))
    print(f"Model saved into: {model_name}.pth")

In [11]:
max_size = 0

for i, (sequence, labels) in enumerate(train_loader, 1):
    bs = sequence.size(2)
    if bs > max_size:
        max_size = bs
    #print(bs)
    
print(max_size)

741


In [13]:
sequence.shape

torch.Size([1, 3, 24, 25])

In [33]:
sequence.reshape(1, -1).shape

torch.Size([1, 10725])

In [35]:
target = torch.zeros(1, 741 * 3 * 25)
target.size()

torch.Size([1, 55575])

In [37]:
target[:, :10725] = sequence.reshape(1, -1)

In [39]:
target[:, 10723:10727]

tensor([[-0.1190, -0.1671,  0.0000,  0.0000]])

In [165]:
# Training the network
#model = BiRNN(75, HIDDEN_SIZE, LINEAR_INPUT_SIZE, len(train_dataset.classes)).to(device)
model = NewBiRNN(INPUT_SIZE, HIDDEN_SIZE, EMBEDDING_INPUT_SIZE, len(train_dataset.classes)).to(device)

#INPUT_SIZE = 741 * 3 * 25
#model = NewBiRNN(INPUT_SIZE, HIDDEN_SIZE, EMBEDDING_INPUT_SIZE, len(train_dataset.classes)).to(device)

start_epoch = 0
if os.path.exists("model.pth"):
    model.load_state_dict(torch.load('model.pth'))
    with open("last_checkpoint", "r") as lf:
        start_epoch = int(lf.read())
    print(f"Model loaded with epoch: {start_epoch}")
else:
    print("Pretrained model not found")

SAVE_CHECHPOINT = 50
PRINT_STEP = 999
board_writer = SummaryWriter()

criterion = nn.BCELoss()  #nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=L2_WEIGTH_DECAY
)

#max_action_frames = 741

# Train the model
for epoch in range(start_epoch, start_epoch + EPOCHS):
    s_time = time.time()
    total_loss = 0.0
    total_iterations_per_epoch = len(train_loader)
    
    for i, (sequence, labels) in enumerate(train_loader, 1):
        #print(sequence.shape)
        #print(labels.shape)
        #print(labels)
        
        sequence = sequence.reshape(-1, 1, 25 * 3).to(device)
        
        #target = torch.zeros(1, 741 * 3 * 25).to(device)
        #sequence = sequence.reshape(1, -1).to(device)
        #sequence_size = sequence.size(1)
        #target[:, :sequence_size] = sequence
        
        labels = labels.to(device)
        
        #print(sequence.shape)
        #print(target.shape)
        #print(labels)
        #break

        # forward pass
        outputs = model(sequence).double()
        #outputs = model(target).double()
        
        labels = labels.repeat(outputs.size(0), 1)
        
        #print(outputs.shape)
        #print(labels.shape)
        #break

        loss = criterion(outputs, labels)
        total_loss += loss.item()
        
        #print(total_loss)
        #break

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % PRINT_STEP == 0:
            average_loss = total_loss / PRINT_STEP
            print(f"{i}/{len(train_loader)} -  Epoch [{epoch + 1}/{EPOCHS}], average_loss: {round(average_loss, 6)}")
            total_loss = 0.0

            board_writer.add_scalar(
                'Average_Loss/train',
                average_loss,
                (epoch * total_iterations_per_epoch) + i
            )

    print(f"Evaluation time: {time.time() - s_time}s.")

    if (epoch + 1) % SAVE_CHECHPOINT == 0:
        save_model(model, f"model_{epoch}", epoch)

save_model(model, "model", epoch)
print("DONE")

Pretrained model not found


RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:18

In [21]:
save_model(model, "model", epoch)

In [130]:
def save_results(dict_results, total_records):
    with open("model_results.json", "w") as jf:
        json.dump({
            "thresholds": dict_results,
            "total_records": total_records
        }, jf)

In [139]:
# Test model
with torch.no_grad():
    steps = 10
    def_dict = {
        "correct": 0,
        "above": 0,
    }
    thresholds = [(round((1 / steps) * (i + 1), 4), def_dict.copy()) for i in range(steps)]
    total = 0

    for i, (sequence, labels) in enumerate(test_loader):
        #break
        sequence = sequence.reshape(-1, 1, 25 * 3).to(device)
        labels = torch.argmax(labels).repeat(outputs.size(0)).to(device)
        
        #print(labels.size())
        #print(labels)
        #break
        
        #labels = labels.repeat(outputs.size(0), 1).to(device)
        #labels = labels.reshape(-1).to(device)
        outputs = model(sequence)

        for val_th, res_dict in thresholds:
            res_dict["above"] += (outputs.data > val_th).sum().item()
            
            # Correctly predicted
            for record_i, label_i in zip(*torch.where(outputs.data > val_th)):
                if label_i == labels[record_i]:
                    res_dict["correct"] += 1
        
        total += labels.size(0)

        if i % 49 == 0:
            print(f"Processed: [{i}/{len(test_dataset)}]")

# Save dictionary results
save_results(thresholds, total)

Processed: [0/837]
Processed: [49/837]
Processed: [98/837]
Processed: [147/837]
Processed: [196/837]
Processed: [245/837]
Processed: [294/837]
Processed: [343/837]
Processed: [392/837]
Processed: [441/837]
Processed: [490/837]
Processed: [539/837]
Processed: [588/837]
Processed: [637/837]
Processed: [686/837]
Processed: [735/837]
Processed: [784/837]
Processed: [833/837]


In [150]:
(outputs.data > 0.06).sum().item()

21

In [140]:
save_results(thresholds, total)

In [51]:
# Load results
with open("model_cv/model_results.json", "r") as jf:
    data = json.load(jf)

thresholds = data["thresholds"]
total = data["total_records"]

In [141]:
# Calculation of evalution metrics for every threshold
ap_score = 0
for th, values in thresholds[:-1]:
    old_recall = 0

    recall = values["correct"] / total

    if values["above"] > 0:
        precision = values["correct"] / values["above"]
        f1_score = 2 * (precision * recall) / (precision + recall)
    else:
        precision = 0
        f1_score = 0

    print("\n" + "-" * 15 + f"{th}" + "-" * 15)
    print(f"Test Precision: {round(100 * precision, 4)}%")
    print(f"Test Recall: {round(100 * recall, 4)}%")
    print(f"Test f1_score: {round(f1_score, 4)}")

    ap_score += (recall - old_recall) * precision
    old_recall = recall

# AP - score
print("-" * 32)
print("-" * 14 + " AP " + "-" * 14)
print(f"AP: {round(ap_score * 100, 4)}%")


---------------0.1---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.2---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.3---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.4---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.5---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.6---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.7---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.8---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0

---------------0.9---------------
Test Precision: 0%
Test Recall: 0.0%
Test f1_score: 0
--------------------------------
-------------- AP --------------
AP: 0.0%


In [None]:
# TODO
#  - evaluacne metriky
#  - format dat pre trenovanie

In [None]:
####### Evalution metrics

In [None]:
# Calculate precision
#  - Precision: the ratio of correctly annotated frames and all the model-annotated frames on test sequences
#  - spravne anotovane (correctly annotates)
#  - zoberiem threshold pre kazdy output v kazdom snimku --> pocet tried do ktorych sa klasifikoval snimok (all model-annotated frames)