In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/Dat_VBDI/Deep_learning_project/data_processing

/content/drive/MyDrive/Dat_VBDI/Deep_learning_project/data_processing


In [3]:
import pandas as pd
import numpy as np
import os
import glob
from os.path import join
import random

In [7]:
def gen_data(input_dir, output_dir, step, val):
    actions = ['idle', 'walk', 'wave']
    poses = []
    labels = []
    for action in actions:
        for vid in glob.glob(join(input_dir, action, '*.csv')):
            df = pd.read_csv(vid, header=None)
            frames = [get_person(df[df[0]==i].values) for i in np.unique(df[0])]
            for i in range(len(frames) - 32 * step):
                poses.append(frames[i:i + 32 * step:step])
            labels += [actions.index(action)] * (len(frames) - 32 * step)
    
    labels = np.array(labels)
    poses = np.array(poses)

    order = np.arange(len(labels))
    random.shuffle(order)

    labels = labels[order]
    poses = poses[order]

    if val == True:
        name = 'val'
    else:
        name = 'train'

    np.save(join(output_dir, f'X_{name}_{step}.npy'), poses)
    np.save(join(output_dir, f'y_{name}_{step}.npy'), labels)
    return

In [6]:
def get_person(frame):
    '''
    input: ndarray (n, 53) contains n skeletons in a single frame
    return: ndarray (12, 2) contains the tallest skeleton without head
    '''

    # get y coordinates
    yy = frame[:, 2::3]

    # get heights
    heights = np.max(yy, 1) - np.min(yy, 1)

    # get the tallest skeleton without head
    max_height = np.argmax(heights)
    person = frame[max_height][17:]
    xx = person[1::3]
    yy = person[::3]
    
    # min max norm
    def norm(A):
        A_min = np.min(A)
        A_max = np.max(A)
        return (A - A_min) / (A_max - A_min)
    xx = norm(xx)    
    yy = norm(yy)
    
    return np.vstack((xx,yy)).T

In [10]:
input_dirs = ['data/annotated_data/pose_csv/train', 'data/annotated_data/pose_csv/val']
vals = [False, True]
steps = [1, 2]

for input_dir, val in zip(input_dirs, vals):
    for step in steps:
        print('processing', input_dir, val, step)
        gen_data(input_dir=input_dir, output_dir='data/train_data', step=step, val=val)

processing data/annotated_data/pose_csv/train False 1
processing data/annotated_data/pose_csv/train False 2
processing data/annotated_data/pose_csv/val True 1
processing data/annotated_data/pose_csv/val True 2
