In [1]:
'''
Integrate all or selected npy files of estimated whole-body 133 keypoints
Reduce number of keypoints from 133 to a selected number ("graph reduction")
'''

import os
import sys
import argparse
import pickle
import numpy as np
from tqdm import tqdm
from natsort import natsorted


selected_joints = {
    '27': np.concatenate(([0,5,6,7,8,9,10], # body
                          [91,95,96,99,100,103,104,107,108,111], # left hand?
                          [112,116,117,120,121,124,125,128,129,132]), # right hand?
                           axis=0)}

max_body_true = 1 # doing what?
max_frame = 150
num_channels = 3 # what channels?

In [2]:
data_path = '/hdd1/dataset/KETI_SignLanguage/Keypoints'
# data_path = '/hdd1/dataset/AUTSL/train_npy'
label_path = '../preprocess_label'
# label_path = '/hdd1/dataset/AUTSL'
out_path = '/hdd1/dataset/KETI_SignLanguage/SLGCN-Data'
part = 'val'
config = '27'

In [3]:
# label info
labels = []
sample_names = []
selected = selected_joints[config]
num_joints = len(selected)

label_file_name = f'label_KETI_{part}_all.csv'
label_file = open(os.path.join(label_path, label_file_name), 'r', encoding='utf-8')

for line in label_file.readlines():
    line = line.strip()
    line = line.split(',')

    sample_names.append(line[0])
    labels.append(int(line[1]))

label_file.close()

In [4]:
sample_names[:10], labels[:10]

(['KETI_SL_0000000354',
  'KETI_SL_0000000112',
  'KETI_SL_0000001061',
  'KETI_SL_0000001169',
  'KETI_SL_0000001595',
  'KETI_SL_0000001311',
  'KETI_SL_0000002109',
  'KETI_SL_0000001490',
  'KETI_SL_0000000292',
  'KETI_SL_0000001656'],
 [351, 111, 224, 331, 338, 54, 109, 234, 287, 400])

In [5]:
fp = np.zeros((len(labels), max_frame, num_joints, num_channels, max_body_true), 
              dtype=np.float32)

frames = []

In [6]:
npy_list = natsorted(os.listdir(data_path))
print(len(npy_list))

41920


In [7]:
for i, npy_file in tqdm(enumerate(npy_list)):
    if i == 0:
        print(os.path.join(data_path, npy_file))
    if i == len(npy_list) - 100:
        print(npy_file.split('.')[0])

41920it [00:00, 3212123.64it/s]

/hdd1/dataset/KETI_SignLanguage/Keypoints/KETI_SL_0000000001.avi.npy
KETI_SL_0000043081





In [8]:
npy_list[:10]

['KETI_SL_0000000001.avi.npy',
 'KETI_SL_0000000002.avi.npy',
 'KETI_SL_0000000003.avi.npy',
 'KETI_SL_0000000004.avi.npy',
 'KETI_SL_0000000005.avi.npy',
 'KETI_SL_0000000006.avi.npy',
 'KETI_SL_0000000007.avi.npy',
 'KETI_SL_0000000008.avi.npy',
 'KETI_SL_0000000009.avi.npy',
 'KETI_SL_0000000010.avi.npy']

In [9]:
def partial(lst, query):
    return [s for s in lst if query in s]

query = 'KETI_SL_0000020960'

partial(npy_list, query)

['KETI_SL_0000020960.mp4.npy']

In [10]:
file_counter = 0
for i, sample_name in tqdm(enumerate(sample_names)):

    # file_name = npy_file.split('.')[0][:-6]
    npy_file = partial(npy_list, sample_name)[0]

    # if file_name not in sample_names:
        # continue
    
    skel = np.load(os.path.join(data_path, npy_file))
    skel = skel[:, selected, :] # frame, joints, channels
    L = skel.shape[0]
    frames.append(L)

    # if shorter frame, then pad the rest from the beginning
    if skel.shape[0] < max_frame: 
        fp[file_counter, :L, :, :, 0] = skel
        
        rest = max_frame - L
        num = int(np.ceil(rest / L))
        pad = np.concatenate([skel for _ in range(num)], 0)[:rest]
        fp[file_counter, L:, :, :, 0] = pad

    else: # or if longer, cut the rest
        fp[file_counter, :, :, :, 0] = skel[:max_frame, :, :]

    file_counter += 1

print(file_counter)

8380it [01:16, 109.85it/s]

8380





In [11]:
fp.shape

(8380, 150, 27, 3, 1)

In [12]:
pkl_file_name = f'{part}_label_all.pkl'
pkl_file_name

'val_label_all.pkl'

In [13]:
with open(os.path.join(out_path, pkl_file_name), 'wb') as f:
    pickle.dump((sample_names, labels), f)

In [14]:
fp = np.transpose(fp, [0, 3, 1, 2, 4])
print(fp.shape)

(8380, 3, 150, 27, 1)


In [15]:
npy_file_name = f'{part}_data_joint_all.npy'
npy_file_name

'val_data_joint_all.npy'

In [16]:
np.save(os.path.join(out_path, npy_file_name), fp)