In [1]:
import numpy as np
import os
import os.path as osp
import sys
import time
from tqdm import tqdm
import pickle
from os.path import join

In [2]:
data_path = '/Users/thomas/Downloads/nturgb+d_skeletons'

In [3]:
# Read all files in the data directory
files = os.listdir(data_path)
files = [f for f in files if f.endswith('.skeleton')]
files = sorted(files)

In [4]:
def normalize_frames(X):
    # Set hip to 0,0,0
    for frame in X:
        hip_offset = np.concatenate([frame[0][:3], np.zeros(4)])
        frame -= hip_offset

In [5]:
# Attempt to load X and Y from pickle before generating them
X = {}
try:
    print('Attempting to load X from pickle')
    with open('X.pkl', 'rb') as f:
        X = pickle.load(f)
    print('X loaded from pickle')
except:
    print('Could not load X and Y, generating them now')

    # Get stats for each file based on name
    files_ = []
    for file in files:
        data = {'file': file,
                's': file[0:4],
                'c': file[4:8],
                'p': file[8:12],
                'r': file[12:16],
                'a': file[16:20]
                }
        files_.append(data)

    # Generate X and Y
    for file_ in tqdm(files_, desc='Files Parsed', position=0):
        try:
            file = join(data_path, file_['file'])
            data = open(file, 'r')
            lines = data.readlines()
            frames_count = int(lines.pop(0).replace('\n', ''))
            file_['frames'] = frames_count
        except UnicodeDecodeError: # .DS_Store file
            print('UnicodeDecodeError: ', file)
            continue

        # Skip file if 2 actors
        if lines[0].replace('\n', '') not in ['1', '2']: continue

        # Get P and add to X if not already there
        p = file_['file']
        if p not in X:
            X[p] = []

        # Parse the file
        for f in tqdm(range(frames_count), desc='Frames Parsed', position=1, leave=False):
            try:
                # Initiate array to hold frame data
                d = []
                # Get actor count
                actors = int(lines.pop(0).replace('\n', ''))
            
                for _ in range(actors):
                    # Get actor info
                    lines.pop(0)

                    # Get joint count
                    joint_count = int(lines.pop(0).replace('\n', ''))

                    # Get joint info
                    for j in range(joint_count):
                        joint = lines.pop(0).replace('\n', '').split(' ')
                        d.append(joint[0:3] + joint[7:11])

                # Convert to numpy array
                d = np.array(d, dtype=np.float16)

                # Append to X and Y
                X[p].append(d)
            except:
                break
        
        # Convert to numpy array
        try:
            if len(X[p]) == 0: del X[p]
            else: X[p] = normalize_frames(np.array(X[p], dtype=np.float16))
        except:
            del X[p]

    print('X Generated, saving to pickle...')

    # Save the data
    with open('X.pkl', 'wb') as f:
        pickle.dump(X, f)

    print('X Saved to pickle')

Attempting to load X from pickle
Could not load X and Y, generating them now


Files Parsed: 100%|██████████| 114480/114480 [16:54<00:00, 112.83it/s]


X Generated, saving to pickle...
X Saved to pickle


In [6]:
camera = []
label = []
performer = []
replication = []
setup = []

for key in X:
    setup.append(int(key[1:4]))
    camera.append(int(key[5:8]))
    performer.append(int(key[9:12]))
    replication.append(int(key[13:16]))
    label.append(int(key[17:20]))

In [7]:
good_files = set(X.keys())
bad_files = set(files) - good_files

In [12]:
good_files = sorted(list(good_files))
bad_files = sorted(list(bad_files))

In [None]:
# remove .skeleton from file names
good_files = [f.split('.')[0] for f in good_files]
bad_files = [f.split('.')[0] for f in bad_files]

In [10]:
len(bad_files), len(good_files)

(3734, 110746)

In [13]:
# camera.txt
# label.txt
# performer.txt
# replication.txt
# setup.txt
# samples_with_missing_skeletons.txt
# skes_available_name.txt

# Save the data to text files above
with open('camera.txt', 'w') as f:
    for c in camera:
        f.write(str(c) + '\n')

with open('label.txt', 'w') as f:
    for l in label:
        f.write(str(l) + '\n')

with open('performer.txt', 'w') as f:
    for p in performer:
        f.write(str(p) + '\n')

with open('replication.txt', 'w') as f:
    for r in replication:
        f.write(str(r) + '\n')

with open('setup.txt', 'w') as f:
    for s in setup:
        f.write(str(s) + '\n')

with open('samples_with_missing_skeletons.txt', 'w') as f:
    for s in bad_files:
        f.write(s + '\n')

with open('skes_available_name.txt', 'w') as f:
    for s in good_files:
        f.write(s + '\n')