# Imports

In [2]:
import numpy as np
import pickle

# Load Data

In [3]:
# NTU RGB+D 120 Skeleton
with open('NTU/X.pkl', 'rb') as f:
    X = pickle.load(f)

# Process Data

## Remove empty files

In [4]:
bad_files = []
for file in X:
    if type(X[file]) == list: 
        bad_files.append(file)
for file in bad_files:
    del X[file]

print(f"Number of files: {len(X.keys())}")

Number of files: 85666


In [5]:
for key in X:
    print(X[key].shape)
    break

(103, 75)


## Define NTU Skeleton

In [6]:
kinematic_chains = [
    [0,1,20,2,3],
    [0,1,20,8,9,10,11,23],
    [0,1,20,8,9,10,11,24],
    [0,1,20,4,5,6,7,21],
    [0,1,20,4,5,6,7,22],
    [0,12,13,14,15],
    [0,16,17,18,19]
]

end_effectors = [x[-1] for x in kinematic_chains]
root_joint = 0

# Build Model

In [7]:
import torch
import torch.nn as nn

## Static Dynamic Extractors

In [43]:
# def extract_skeleton(frames, joints = 25):
#     static = np.zeros(joints) # (joints)
#     dynamic = np.zeros((len(frames), joints, 3)) # (frames, joints, 3)
    
#     # Static Skeleton = Offset from parent joint in form of l2 norm
#     for path in kinematic_chains:
#         for j in range(len(path)):
#             if j == 0: continue # root offset is always 0
#             if static[path[j]] != 0: continue # already computed
            
#             # calculate l2 norm (offset) from j to j-1
#             static[path[j]] = np.linalg.norm(frames[0][path[j]*3:path[j]*3+3]-frames[0][path[j-1]*3:path[j-1]*3+3])

#     # Dynamic Skeleton = X,Y,Z rotations from parent joint, for each frame
#     for f in range(len(frames)):
#         for path in kinematic_chains:
#             for j in range(len(path)):
#                 if j == 0: continue # root offset is always 0,0,0
#                 if not np.all(dynamic[f][path[j]]==0): continue # already computed

#                 # calculate rotation from j to j-1
#                 vec = frames[f][path[j]*3:path[j]*3+3]-frames[f][path[j-1]*3:path[j-1]*3+3]
#                 length = np.linalg.norm(vec)
#                 norm_vec = vec / length
#                 x_rot = np.arctan2(norm_vec[1], norm_vec[2])
#                 y_rot = np.arctan2(norm_vec[0], norm_vec[2])
#                 z_rot = np.arctan2(norm_vec[0], norm_vec[1])
#                 dynamic[f][path[j]] = np.array([x_rot, y_rot, z_rot])
    
#     return static, dynamic

# Modified to use matrix operations to decrease computational complexity
def extract_skeleton(frames, joints=25):
    n_frames = len(frames)
    frames = np.reshape(frames, (n_frames, joints, 3))

    static = np.zeros(joints) # (joints)
    dynamic = np.zeros((n_frames, joints, 3)) # (frames, joints, 3)
    
    # Static Skeleton = Average offset from parent joint in form of l2 norm
    for path in kinematic_chains:
        path = np.array(path)
        parent_joints = frames[:, path[1:], :]
        child_joints = frames[:, path[:-1], :]
        norms = np.linalg.norm(parent_joints - child_joints, axis=-1) 
        static[path[1:]] = np.mean(norms, axis=0)


    # Dynamic Skeleton = X,Y,Z rotations from parent joint, for each frame
    for path in kinematic_chains:
        path = np.array(path)
        parent_joints = frames[:, path[1:], :]
        child_joints = frames[:, path[:-1], :]
        vec = parent_joints - child_joints
        length = np.linalg.norm(vec, axis=-1, keepdims=True)
        norm_vec = vec / length

        dynamic[:, path[1:], 0] = np.arctan2(norm_vec[..., 1], norm_vec[..., 2])
        dynamic[:, path[1:], 1] = np.arctan2(norm_vec[..., 0], norm_vec[..., 2])
        dynamic[:, path[1:], 2] = np.arctan2(norm_vec[..., 0], norm_vec[..., 1])
    
    return static, dynamic

In [50]:
# For testing
for key in X:
    static, dynamic = extract_skeleton(X[key])
    break

## Skeleton Pool/Unpool

In [85]:
class SkeletonPool(nn.Module):
    def __init__(self):
        super(SkeletonPool, self).__init__()
        self.conv1 = nn.Conv1d(1, 1, 2, stride=2)
        self.pool1 = nn.AvgPool1d(2, stride=2)
        self.conv2 = nn.Conv1d(1, 1, 2, stride=2)
        self.pool2 = nn.AvgPool1d(2, stride=2)

    def forward(self, S, Q = None): # X = (batch, S or Q+S) where S = (joints, 1) and Q = (frames, joints, 3). If Q+S, tile S to (frames, joints, 1)
        S = torch.tensor(S)
        if Q is None:
            x = S.unsqueeze(-1)
        else:
            Q = torch.tensor(Q)
            x = torch.cat((Q, S.repeat(Q.shape[0], 1).unsqueeze(2)), dim=-1)
        print(x.shape)
        return self.pool2(self.conv2(self.pool1(self.conv1(x))))

In [86]:
pool = SkeletonPool()
pool(static)#, dynamic)

torch.Size([25, 1])


RuntimeError: Given groups=1, weight of size [1, 1, 2], expected input[1, 25, 1] to have 1 channels, but got 25 channels instead

## Static Encoder

In [None]:
class StaticEncoder(nn.Module):
    def __init__(self, num_layers):
        super(StaticEncoder, self).__init__()
        self.layers = nn.ModuleList()
        self.num_layers = num_layers
        activation = nn.LeakyReLU(negative_slope=0.2)
        channels = 3

    def forward(self, S):
        # Call skeleton pool with S

## Dynamic Encoder

In [None]:
# forward call takes S + Q as input
# call skeleton pool with S + Q

## Encoder

In [None]:
class 

## Decoder