In [2]:
# imports
import os
import random
import numpy as np
import datetime
from typing import List, Tuple

In [2]:
datetime.datetime.now().isoformat()

'2020-05-03T11:46:06.847512'

In [16]:
FILE_NAME = "actions-single-subject-all-POS.data"

In [17]:
def read_file(file_path: str):
    with open(file_path, "r") as f:
        for line in f:
            yield line.rstrip("\n")
    yield None

In [18]:
class Action:
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, sequence_id: str, label: str, start_offset: str, length: str):
        self.seq_id = sequence_id
        self.label = int(label)
        self.offset = int(start_offset)
        self.length = int(length)
        
        self._frames = []
    
    def add_frame(self, new_frame):
        assert new_frame.shape == (self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
        self._frames.append(new_frame)
    
    def to_numpy(self):
        self._frames = np.array(self._frames)
        assert self._frames.shape == (self.length, self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
    
    def get_header(self) -> str:
        return f"{self.seq_id}_{self.label}_{self.offset}_{self.length}"
    
    def __len__(self) -> int:
        return self.length
    
    def __str__(self) -> str:
        return f"<Action {self.seq_id}_{self.label}_{self.offset}_{self.length} {self._frames.shape}/>"

In [19]:
def read_action(file_reader, action_header: str) -> Tuple[Action, List[str]]:
    action_lines = [action_header]
    
    # first header
    action_header = action_header.split()[-1]
    new_action = Action(
        *action_header.split("_")
    )
    
    # second header
    second_header = next(file_reader)
    number_of_frames = int(second_header.split(";")[0])
    
    action_lines.append(second_header)
    assert len(new_action) == number_of_frames
    
    # read all frames
    for _ in range(number_of_frames):
        line = next(file_reader)
        frame = np.array([triple.split(", ") for triple in line.split(";")], dtype=np.float32)
        new_action.add_frame(frame)
        action_lines.append(line)

    new_action.to_numpy()
    return new_action, action_lines

In [20]:
export_folder = os.path.join(
    "..", "data", "exports",
    f"{os.path.splitext(FILE_NAME)[0]}_{datetime.datetime.now().isoformat()}"
)
os.makedirs(export_folder)

file_gen = read_file(os.path.join("..", "data", FILE_NAME))

actions = []
new_action = None

line = next(file_gen)
while line:
    action, action_text = read_action(file_gen, action_header=line)
    actions.append(action)
    
    with open(os.path.join(export_folder, action.get_header() + ".data"), "w") as f:
        f.write('\n'.join(action_text))
    
    line = next(file_gen)
    if len(actions) % 500 == 0:
        print(f"Loaded {len(actions)} actions")

Loaded 500 actions
Loaded 1000 actions
Loaded 1500 actions
Loaded 2000 actions
Loaded 2500 actions
Loaded 3000 actions
Loaded 3500 actions
Loaded 4000 actions
Loaded 4500 actions
Loaded 5000 actions
Loaded 5500 actions
Loaded 6000 actions
Loaded 6500 actions
Loaded 7000 actions
Loaded 7500 actions
Loaded 8000 actions
Loaded 8500 actions
Loaded 9000 actions
Loaded 9500 actions
Loaded 10000 actions
Loaded 10500 actions
Loaded 11000 actions
Loaded 11500 actions
Loaded 12000 actions
Loaded 12500 actions
Loaded 13000 actions
Loaded 13500 actions
Loaded 14000 actions
Loaded 14500 actions
Loaded 15000 actions
Loaded 15500 actions
Loaded 16000 actions
Loaded 16500 actions
Loaded 17000 actions
Loaded 17500 actions
Loaded 18000 actions
Loaded 18500 actions
Loaded 19000 actions
Loaded 19500 actions


In [23]:
ids = [a.label for a in actions]
len(ids)

19820

In [24]:
set(ids)

{1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 13,
 15,
 17,
 19,
 20,
 22,
 23,
 25,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51}

In [26]:
label_dict = dict()
for i, label in enumerate(set(ids)):
    label_dict[label] = i
label_dict

{1: 0,
 2: 1,
 3: 2,
 4: 3,
 5: 4,
 6: 5,
 7: 6,
 8: 7,
 9: 8,
 10: 9,
 11: 10,
 13: 11,
 15: 12,
 17: 13,
 19: 14,
 20: 15,
 22: 16,
 23: 17,
 25: 18,
 28: 19,
 29: 20,
 30: 21,
 31: 22,
 32: 23,
 33: 24,
 34: 25,
 35: 26,
 36: 27,
 37: 28,
 38: 29,
 39: 30,
 40: 31,
 41: 32,
 42: 33,
 43: 34,
 44: 35,
 45: 36,
 46: 37,
 47: 38,
 48: 39,
 49: 40,
 50: 41,
 51: 42}

In [67]:
print(f"Total number of actions: {len(actions)}")

Total number of actions: 19820


In [4]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [48]:
# Torch dataloader
class MovementsDataset(Dataset):
    
    NUMBER_OF_JOINTS = 25
    NUMBER_OF_AXES = 3
    
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        
        self.data_files = list(sorted(os.listdir(self.root)))
        self.file_frames: List[int] = []
        
        # Load number of frames for every file
        for fn in self.data_files:
            with open(os.path.join(self.root, fn)) as f:
                header = f.readline()
                self.file_frames.append(
                    int(header.split()[-1].split("_")[-1])
                )
        
    def _get_file_index(self, frame_indx) -> Tuple[int, int]:
        start_indx = frame_indx
        for i, nof in enumerate(self.file_frames):
            if start_indx < nof:
                # print(f"{start_indx} - {i}")
                return i, start_indx
            else:
                start_indx -= nof
        
    def __getitem__(self, indx):
        file_indx, line_indx = self._get_file_index(indx)
        action_file = os.path.join(self.root, self.data_files[file_indx])
        
        with open(action_file, "r") as f:
            data_str = f.read().rstrip('\n').split('\n')
        
        line_indx += 2  # first two header lines in the file   
        frame = np.array([triple.split(", ") for triple in data_str[line_indx].split(";")], dtype=np.float32)
        assert frame.shape == (self.NUMBER_OF_JOINTS, self.NUMBER_OF_AXES)
        
        target = int(data_str[0].split()[-1].split("_")[1])
        
        if self.transforms:
            frame = self.transforms(frame)
        
        return frame, target
    
    def __len__(self) -> int:
        return len(self.data_files)

In [50]:
md = MovementsDataset(
    "../data/exports/actions-single-subject-all-POS_2020-04-29T15:15:27.384264",
    transforms=transforms.ToTensor()
)

In [44]:
a, b = md[143 + 24]
print(a.shape)
print(b)

24 - 1
torch.Size([1, 25, 3])
11


In [37]:
md.file_frames[1]

25

In [39]:
fn = md.data_files[1]
fn

'0002-L_11_3677_25.data'

In [40]:
with open(os.path.join("../data/exports/actions-single-subject-all-POS_2020-04-29T15:15:27.384264", fn), "r") as f:
    data = f.read().split("\n")
data[0]

'#objectKey messif.objects.keys.AbstractObjectKey 0002-L_11_3677_25'

In [52]:
tl = DataLoader(md, batch_size=128, shuffle=True)

In [54]:
for i, (images, labels) in enumerate(tl):
    print(images.size())
    print(labels.size())
    break

torch.Size([128, 1, 25, 3])
torch.Size([128])


In [15]:
input_folder = "../data/exports/actions-single-subject-all-POS_2020-04-29T15:15:27.384264"

data_files = os.listdir(input_folder)
random.seed(42)
random.shuffle(data_files)

train_size, test_size = 0.80, 0.20
assert train_size + test_size == 1

split_indx = int(train_size * len(data_files))

train_files = data_files[:split_indx]
test_files = data_files[split_indx:]

print(f"Splited {len(data_files)} files ({train_size * 100}/{test_size * 100})\n  Train: {len(train_files)}\n  Test: {len(test_files)}")

export_folder_train = input_folder + "-train"
export_folder_test = input_folder + "-test"
os.mkdir(export_folder_train)
os.mkdir(export_folder_test)

for file_name in train_files:
    os.rename(
        os.path.join(input_folder, file_name),
        os.path.join(export_folder_train, file_name)
    )

for file_name in test_files:
    os.rename(
        os.path.join(input_folder, file_name),
        os.path.join(export_folder_test, file_name)
    )

print("DONE")

Splited 19820 files (80.0/20.0)
  Train: 15856
  Test: 3964
DONE


In [6]:
folder = "../data/exports/2020-05-08T12:23:23.327925/cross-subject/train"
file_example = os.listdir(folder)[0]
with open(os.path.join(folder, file_example), "r") as df:
    data = df.read().rstrip("\n").split("\n")

In [8]:
data[0]

'#objectKey messif.objects.keys.AbstractObjectKey 0238-L_5_2599_114'