In [1]:
# import codecs as cs
import pandas as pd
import numpy as np
from tqdm import tqdm
from os.path import join as pjoin
import math
import torch
#from utils.rotation_conversions import *
import copy
#from utils.face_z_align_util import joint_idx, face_z_transform
from smplx import SMPLX
from utils.geometry import *
import json
import os

In [2]:
def rotate_motion(root_global_orient):
    trans_matrix = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]])

    motion = np.dot(root_global_orient, trans_matrix)  # exchange the y and z axis

    return motion

def compute_canonical_transform(global_orient):
    rotation_matrix = torch.tensor([
        [1, 0, 0],
        [0, 0, 1],
        [0, -1, 0]
    ], dtype=global_orient.dtype)
    global_orient_matrix = axis_angle_to_matrix(global_orient)
    global_orient_matrix = torch.matmul(rotation_matrix, global_orient_matrix)
    global_orient = matrix_to_axis_angle(global_orient_matrix)
    return global_orient

def transform_translation(trans):
    trans_matrix = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]])
    trans = np.dot(trans, trans_matrix)  # exchange the y and z axis
    trans[:, 2] = trans[:, 2] * (-1)
    return trans


In [3]:
def get_smplx_322(data, ex_fps):
    fps = 0


    if 'mocap_frame_rate' in data:
        fps = data['mocap_frame_rate']
        print(fps)
        down_sample = int(fps / ex_fps)
        
    elif 'mocap_framerate' in data:
        fps = data['mocap_framerate']
        print(fps)
        down_sample = int(fps / ex_fps)
    else:
        # down_sample = 1
        return None

    frame_number = data['trans'].shape[0]
    


    fId = 0 # frame id of the mocap sequence
    pose_seq = []

    for fId in range(0, frame_number, down_sample):
        pose_root = data['global_ori'][fId:fId+1]
        pose_body = data['poses'][fId:fId+1,1:22,:].reshape(1, 63)
        pose_hand = data['poses'][fId:fId+1,25:, :].reshape(1, 90)
        pose_jaw = data['poses'][fId:fId+1,23:24,:].reshape(1, 3)
        pose_expression = np.zeros((1, 50))
        pose_face_shape = np.zeros((1, 100))
        pose_trans = data['trans'][fId:fId+1]
        pose_body_shape = data['betas'][:10][None,:]
        #print shapes
        print(pose_root.shape, pose_body.shape, pose_hand.shape, pose_jaw.shape, pose_expression.shape, pose_face_shape.shape, pose_trans.shape, pose_body_shape.shape)
        pose = np.concatenate((pose_root, pose_body, pose_hand, pose_jaw, pose_expression, pose_face_shape, pose_trans, pose_body_shape), axis=1)
        print(pose.shape)
        pose_seq.append(pose)

    pose_seq = np.concatenate(pose_seq, axis=0)
    

    return pose_seq


def process_pose(pose):
    pose_root = pose[:, :3]
    pose_root = compute_canonical_transform(torch.from_numpy(pose_root)).detach().cpu().numpy()
    pose[:, :3] = pose_root
    pose_trans = pose[:, 309:312]
    pose_trans = transform_translation(pose_trans)
    pose[:, 309:312] = pose_trans

    return pose



In [28]:
# dict_keys(['root_pose', 'body_pose', 'jaw_pose', 'leye_pose', 'reye_pose', 'lhand_pose', 'rhand_pose', 'expr', 'trans'])
# root_pose (3,)
# body_pose (21, 3)
# jaw_pose (3,)
# leye_pose (3,)
# reye_pose (3,)
# lhand_pose (15, 3)
# rhand_pose (15, 3)
# expr (50,)
# trans (3,) 

def get_smplx_322_optimised(data, ex_fps):
    fps = 0


    if 'mocap_frame_rate' in data:
        fps = data['mocap_frame_rate']
        print(fps)
        down_sample = int(fps / ex_fps)
        
    elif 'mocap_framerate' in data:
        fps = data['mocap_framerate']
        print(fps)
        down_sample = int(fps / ex_fps)
    else:
        # down_sample = 1
        fps = 25
        down_sample = int(fps / ex_fps)

    frame_number = len(data)
    print(frame_number)
    
    print(data.shape)

    fId = 0 # frame id of the mocap sequence
    pose_seq = []

    for fId in range(0, frame_number, down_sample):
        data_pose = data[fId]
        pose_root = data_pose['root_pose'].reshape(1, 3)
        pose_body = data_pose['body_pose'].reshape(1, 63)
        pose_hand = data_pose['lhand_pose'].reshape(1, 45)
        pose_hand = np.concatenate((pose_hand, data_pose['rhand_pose'].reshape(1, 45)), axis=1)
        pose_jaw = data_pose['jaw_pose'].reshape(1, 3)
        pose_expression = data_pose['expr'].reshape(1, 50)
        pose_face_shape = np.zeros((1, 100))
        pose_trans = data_pose['trans'].reshape(1, 3)
        pose_body_shape = np.asarray([[0.421,-1.658,0.361,0.314,0.226,0.065,0.175,-0.150,-0.097,-0.191]])
        #print shapes
        print(pose_root.shape, pose_body.shape, pose_hand.shape, pose_jaw.shape, pose_expression.shape, pose_face_shape.shape, pose_trans.shape, pose_body_shape.shape)
        pose = np.concatenate((pose_root, pose_body, pose_hand, pose_jaw, pose_expression, pose_face_shape, pose_trans, pose_body_shape), axis=1)
        print(pose.shape)
        pose_seq.append(pose)

    pose_seq = np.concatenate(pose_seq, axis=0)
    

    return pose_seq


def process_pose_optimised(pose):
    pose_root = pose[:, :3]
    pose_root = compute_canonical_transform(torch.from_numpy(pose_root)).detach().cpu().numpy()
    pose[:, :3] = pose_root
    pose_trans = pose[:, 309:312]
    pose_trans = transform_translation(pose_trans)
    pose[:, 309:312] = pose_trans

    return pose



In [26]:
path = "0.npz"
data = np.load(path, allow_pickle=True)
data = dict(data)
print(data.keys())
for key in data.keys():
    print(key, data[key].shape)
ex_fps = 30
pose_seq = get_smplx_322(data, ex_fps)
pose_seq = process_pose(pose_seq)

np.save("0_smplx_322.npy",pose_seq)

dict_keys(['motion_info', 'betas', 'poses', 'global_ori', 'trans', 'mocap_frame_rate', 'gender', 'expression'])
motion_info (0,)
betas (10,)
poses (528, 55, 3)
global_ori (528, 3)
trans (528, 3)
mocap_frame_rate ()
gender ()
expression (528, 100)
30
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1

In [30]:
path = "/scratch/aparna/bsldict_test_preprocessed/nearly/n_001_049_000_nearly/smplx_optimized/smplx_params"
jsons = os.listdir(path)
all_poses = []
for json_file in jsons:
    data= json.load(open(pjoin(path, json_file)))
    data = dict(data)
    # print(data.keys())
    for key in data.keys():
        data[key] = np.array(data[key])

    all_poses.append(data)
all_poses = np.array(all_poses)
ex_fps = 25
pose_seq = get_smplx_322_optimised(all_poses, ex_fps)
print(pose_seq.shape)

pose_seq = process_pose_optimised(pose_seq)
np.save(pjoin(path, json_file[:-4]+"_smplx_322.npy"),pose_seq)

38
(38,)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 322)
(1, 3) (1, 63) (1, 90) (1, 3) (1, 50) (1, 100) (1, 3) (1, 10)
(1, 32

In [10]:
path = "../src/tomato_represenation/motion_data/smplx_322/18th_smplx_322.npy"
data = np.load(path)
print(data.shape)


path = "0_smplx_322.npy"
data = np.load(path)
print(data.shape)





(38, 322)
(528, 322)


In [9]:
# Load text and motion data
import torch
from transformers import AutoTokenizer, AutoModel
from tma.models.architectures.temos.textencoder.distillbert_actor import DistilbertActorAgnosticEncoder
from tma.models.architectures.temos.motionencoder.actor import ActorAgnosticEncoder
from collections import OrderedDict

modelpath = 'distilbert-base-uncased'

#textencoder = DistilbertActorAgnosticEncoder(modelpath, num_layers=4)
motionencoder = ActorAgnosticEncoder(nfeats=623, vae = True, num_layers=4)

"""
load model here
You need to normalize the motion data with mean and std.
For motionx, they are stored in './deps/t2m/motionx/vector_623/Comp_v6_KLD01/meta/*.npy'
"""

motion = "/ssd_scratch/cvit/aparna/HumanTOMATO/src/tomato_represenation/motion_data/new_joint_vecs/0_smplx_322.npy"
data = np.load(motion)
print(data.shape)

motion = np.load(motion)
motion = torch.tensor(motion, dtype=torch.float32).reshape(1, motion.shape[0], motion.shape[1])
lengths = [motion.shape[1]]
#print(textencoder(["a man is running"]).loc)
print(motion.shape)
print(motionencoder(motion, lengths).loc)

(415, 623)
torch.Size([1, 415, 623])
tensor([[ 1.4078, -0.4695, -1.2225, -0.7949, -0.8898, -0.2829,  1.8933, -0.1200,
         -1.4745, -0.5572, -0.8818,  0.7569,  0.3521,  0.6904,  0.1940,  0.3485,
         -0.8967, -0.7163,  0.6620,  0.8580, -0.5216,  0.7121,  0.2084,  0.9818,
         -0.8040,  0.8997, -0.2876, -1.3231, -0.3650,  0.5897, -2.3612,  1.0297,
         -0.7807, -0.0582, -1.2213, -0.1504,  2.0377,  0.8306, -1.5268, -1.0279,
         -0.3199, -1.2667,  0.0196,  1.7906,  1.9777,  0.8888, -2.8474,  0.3219,
          1.2725,  2.3794,  0.3434,  1.1487, -0.5008,  0.4776, -0.5489, -0.2131,
          0.5023, -0.0802, -0.8274, -0.2237, -0.4762,  0.3259,  1.1169,  1.3693,
         -1.8126,  0.4727, -0.8788,  0.7635, -1.2268, -0.2989,  1.2497, -0.3367,
         -0.6765, -2.1242, -0.5260,  1.0518,  0.6298,  1.1701, -0.9792,  0.7567,
          0.9175, -0.5187,  0.0863, -0.5688,  1.1248,  0.6745, -0.1620, -1.0169,
         -0.6415,  2.6633,  0.2530,  0.1762,  0.4190, -0.7152,  0.8950, 