In [None]:
import pandas as pd
import numpy as np
import os

import plotly.express as px

In [None]:
keypoint_labels = [
    'headset',          # 0
    'headset fwd',      # 1
    'left controller',  # 2
    'right controller', # 3
    'head',             # 4
    'neck',             # 5
    'left shoulder',    # 6
    'right shoulder',   # 7
    'left elbow',       # 8
    'right elbow',      # 9
    'left hip',         # 10
    'right hip',        # 11
    'left knee',        # 12
    'right knee',       # 13
    'left foot',        # 14
    'right foot',       # 15
    'left toes',        # 16
    'right toes',       # 17
    'scale'             # 18
]

IMG_SIZE = 256 # size of images on disk
all_datasets = os.path.join(os.getcwd(), os.pardir, 'datasets')

datasets = os.listdir(all_datasets)
print(datasets)

In [None]:
selected_dataset = datasets[0] 

In [None]:
def process_dataset(loc):
    data = os.listdir(loc)
    capdata_loc = os.path.join(loc, [x for x in data if "Dataset" in x][0])
    captures = [os.path.join(capdata_loc, x) for x in os.listdir(capdata_loc) if "captures" in x]
    # rgb_loc = os.path.join(loc, [x for x in data if "RGB" in x][0])
    # rgbs = [os.path.join(rgb_loc, x) for x in os.listdir(rgb_loc)]
    # semseg_loc = [x for x in data if "Semantic" in x][0]
    # semseg = [os.path.join(semseg_loc, x) for x in os.listdir(os.path.join(loc,semseg_loc))]
    return captures #, rgbs, semseg

# https://stackoverflow.com/questions/48265646/rotation-of-a-vector-python
def unit_vector(vector):
    """ Returns the unit vector of the vector."""
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """Finds angle between two vectors"""
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

def x_rotation(vector,theta):
    """Rotates 3-D vector around x-axis"""
    R = np.array([[1,0,0],[0,np.cos(theta),-np.sin(theta)],[0, np.sin(theta), np.cos(theta)]])
    return np.array([np.dot(R,x) for x in vector])

def y_rotation(vector,theta):
    """Rotates 3-D vector around y-axis"""
    R = np.array([[np.cos(theta),0,np.sin(theta)],[0,1,0],[-np.sin(theta), 0, np.cos(theta)]])
    return np.array([np.dot(R,x) for x in vector])

def z_rotation(vector,theta):
    """Rotates 3-D vector around z-axis"""
    R = np.array([[np.cos(theta), -np.sin(theta),0],[np.sin(theta), np.cos(theta),0],[0,0,1]])
    return np.array([np.dot(R,x) for x in vector])


# assume keypoints are headset relative 3D coordinates
# return normalized keypoints, ie, each keypoint vector relative to its parent joint, normalized, with length also returned
def keypoints_to_normalized(keypoints):
    base_keypoints = keypoints.copy()

    # keypoints[1] = base_keypoints[1] - base_keypoints[0] # headset fwd from headset
    keypoints[2] = base_keypoints[2] - base_keypoints[8] # left controller from left elbow
    keypoints[3] = base_keypoints[3] - base_keypoints[9] # right controller from right elbow
    # keypoints[4] = base_keypoints[4] - base_keypoints[0] # head from headset
    keypoints[5] = base_keypoints[5] - base_keypoints[4] # neck from head
    keypoints[6] = base_keypoints[6] - base_keypoints[5] # left shoulder from neck
    keypoints[7] = base_keypoints[7] - base_keypoints[5] # right shoulder from neck
    keypoints[8] = base_keypoints[8] - base_keypoints[6] # left elbow from left shoulder
    keypoints[9] = base_keypoints[9] - base_keypoints[7] # right elbow from right shoulder
    keypoints[10] = base_keypoints[10] - base_keypoints[5] # left hip from neck
    keypoints[11] = base_keypoints[11] - base_keypoints[5] # right hip from neck
    keypoints[12] = base_keypoints[12] - base_keypoints[10] # left knee from left hip
    keypoints[13] = base_keypoints[13] - base_keypoints[11] # right knee from right hip
    keypoints[14] = base_keypoints[14] - base_keypoints[12] # left foot from left knee
    keypoints[15] = base_keypoints[15] - base_keypoints[13] # right foot from right knee
    keypoints[16] = base_keypoints[16] - base_keypoints[14] # left toes from left foot
    keypoints[17] = base_keypoints[17] - base_keypoints[15] # right toes from right foot
    # keypoints[18] = base_keypoints[18] - base_keypoints[0] # scale from headset

    lengths = np.linalg.norm(keypoints, axis=1).reshape(-1,1)
    # remove norm from keypoints
    keypoints[1:] = keypoints[1:] / lengths[1:]
    return np.concatenate((keypoints, lengths), axis=1)

# assume input is 3d keypoints normalized to their parent joint with length included
def normalized_to_keypoints(keypoints):
    lengths = keypoints[:,-1].reshape(-1,1)
    keypoints = keypoints[:,:-1] * lengths

    # keypoints[1] = keypoints[1] + keypoints[0] # headset fwd from headset
    # keypoints[4] = keypoints[4] + keypoints[0] # head from headset
    keypoints[5] = keypoints[5] + keypoints[4] # neck from head
    keypoints[6] = keypoints[6] + keypoints[5] # left shoulder from neck
    keypoints[7] = keypoints[7] + keypoints[5] # right shoulder from neck
    keypoints[8] = keypoints[8] + keypoints[6] # left elbow from left shoulder
    keypoints[9] = keypoints[9] + keypoints[7] # right elbow from right shoulder
    keypoints[2] = keypoints[2] + keypoints[8] # left controller from left elbow
    keypoints[3] = keypoints[3] + keypoints[9] # right controller from right elbow
    keypoints[10] = keypoints[10] + keypoints[6] # left hip from left shoulder
    keypoints[11] = keypoints[11] + keypoints[7] # right hip from right shoulder
    keypoints[12] = keypoints[12] + keypoints[10] # left knee from left hip
    keypoints[13] = keypoints[13] + keypoints[11] # right knee from right hip
    keypoints[14] = keypoints[14] + keypoints[12] # left foot from left knee
    keypoints[15] = keypoints[15] + keypoints[13] # right foot from right knee
    keypoints[16] = keypoints[16] + keypoints[14] # left toes from left foot
    keypoints[17] = keypoints[17] + keypoints[15] # right toes from right foot
    # keypoints[18] = keypoints[18] + keypoints[0] # scale from headset

    return keypoints


def process_keypoints(keypoints, camera_position, camera_direction, img_size=IMG_SIZE):
    keypoints = pd.json_normalize(keypoints)

    visibility_state = np.array(keypoints['state'].values)

    if visibility_state.sum() < 13:
        return None, None
    
    screenspace_coords = keypoints[['x','y']].values

    if not (np.logical_and(screenspace_coords>6, screenspace_coords<(img_size-6)).sum() / len(screenspace_coords)) > 0.9:
        return None, None
    
    global_coords = keypoints[['vx','vy','vz']].values

    headset_relative_coords = global_coords - global_coords[0]
    relative_camera_position = camera_position - global_coords[0]

    xz_headset_fwd = np.array([headset_relative_coords[1][0], 0.0, headset_relative_coords[1][2]])
    xz_headset_to_camera = np.array([relative_camera_position[0], 0.0, relative_camera_position[2]])
    alignment_rotation = angle_between(xz_headset_fwd, xz_headset_to_camera)

    camera_facing_coords = y_rotation(headset_relative_coords, alignment_rotation)

    xz_headset_fwd = np.array([camera_facing_coords[1][0], 0.0, camera_facing_coords[1][2]])
    new_angle = angle_between(xz_headset_fwd, xz_headset_to_camera)
    if new_angle > 0.001:
        camera_facing_coords = y_rotation(headset_relative_coords, -alignment_rotation)
        # xz_headset_fwd = np.array([camera_facing_coords[1][0], 0.0, camera_facing_coords[1][2]])
        # new_angle = angle_between(xz_headset_fwd, xz_headset_to_camera)
        # print(new_angle)

    # Normalize the keypoints
    normalized_keypoints = keypoints_to_normalized(camera_facing_coords)

    xz_headset_fwd = np.array([normalized_keypoints[1][0], 0.0, normalized_keypoints[1][2]])

    output_keypoints = np.concatenate((normalized_keypoints, screenspace_coords, visibility_state.reshape(-1,1)), axis=1)
    return output_keypoints, relative_camera_position


def organize_and_process_captures(captures):
    dfs = []
    for capture in captures:
        df = pd.json_normalize(pd.read_json(capture).values[:,1].tolist())[['id','sequence_id','step',
                                                        'timestamp','filename','annotations']]
        df.columns = ['row_id', 'seq_id','step','timestamp','rgb_filename','annotations']
        keypoints = []
        camera_positions = []
        camera_directions = []
        segmented_images = []
        for annotation in df['annotations'].values:
            annotation_values = annotation[2]['values']
            if annotation_values:
                annotation_values = annotation_values[0]

                camera_position = np.array(list(annotation_values['camera_position'].values()))
                camera_direction = np.array(list(annotation_values['camera_forward'].values()))

                normalized_keypoints, camera_position = process_keypoints(annotation_values['keypoints'], camera_position, camera_direction)

                keypoints.append(normalized_keypoints)
                camera_positions.append(camera_position)
                camera_directions.append(camera_direction)
                segmented_images.append(annotation[1]['filename'])
            else:
                keypoints.append(None)
                camera_positions.append(None)
                camera_directions.append(None)
                segmented_images.append("")

        df['keypoints'] = keypoints
        df['camera_position'] = camera_positions
        df['camera_direction'] = camera_directions
        df['seg_filename'] = segmented_images

        df['seg_filename'] = [os.path.join(all_datasets, selected_dataset, x) for x in df['seg_filename']]
        df['rgb_filename'] = [os.path.join(all_datasets, selected_dataset, x) for x in df['rgb_filename']]
        
        dfs.append(df)
    dfs = pd.concat(dfs)
    dfs.drop('annotations',axis=1, inplace=True)
    dfs.dropna(inplace=True, axis=0)
    return dfs

In [None]:
data_captures = process_dataset(os.path.join(all_datasets, selected_dataset)) # , rgb_images, segmented_images
df = organize_and_process_captures(data_captures)
# use numpy to save the dataframe to a file
np.save(os.path.join(all_datasets, selected_dataset, 'processed.npy'), df.to_numpy())
# display the dataframe
df.head()

In [None]:
# loading
# df = pd.DataFrame(np.load(os.path.join(all_datasets, selected_dataset, 'processed.npy'), allow_pickle=True), 
#                       columns=['row_id', 'seq_id','step','timestamp','rgb_filename','keypoints','camera_position','camera_direction','seg_filename'])

In [1]:
# use plotly to create a function to print 3D keypoints to an interactive 3D plot
def plot_keypoints(keypoints, camera_pos=None):
    if camera_pos is not None:
        keypoints_and_camera = np.concatenate((keypoints, np.array([camera_pos])), axis=0)
        fig = px.scatter_3d(x=keypoints_and_camera[:,0], y=keypoints_and_camera[:,1], z=keypoints_and_camera[:,2], 
                            text=keypoint_labels[:len(keypoints)]+['camera'])
    else:
        fig = px.scatter_3d(x=keypoints[:,0], y=keypoints[:,1], z=keypoints[:,2], text=keypoint_labels[:len(keypoints)])
    # set scale to be equal
    fig.update_scenes(aspectmode='cube')
    
    # set plotly theme to dark
    fig.update_layout(template='plotly_dark')
    fig.show()

In [None]:
plot_keypoints(normalized_to_keypoints(df['keypoints'].values[0][:,:4]), df['camera_position'].values[0])