In [1]:
from pathlib import Path

from lgtm.model.motion_diffusion import MotionDiffusion as LGTM
from lgtm.dataset.HumanML3D import HumanML3D, recover_rearranged_humanml3d_features
from lgtm.utils.body_part_annotation_augmentation import BodyPartAnnotationTool

In [2]:
from lgtm.dataset.HumanML3D import BodyPart_HumanML3D

# This tool is for decomposition of full body motion description. You need to get an API key from OpenAI
tool = BodyPartAnnotationTool(
    api_key="",
    base_url="https://api.openai.com/v1",
)

# Load LGTM
model = LGTM.load_from_checkpoint(Path("./checkpoints/lgtm/checkpoints/epoch=196-val_loss=0.3769.ckpt"))
model.freeze()

# This part-level dataset is for de-normalized data. But you can use it for generation
dataset = BodyPart_HumanML3D(
    HumanML3D(Path("third_packages/HumanML3D"), Path("data/glove"), "all"),
    Path("./third_packages/TMR/datasets/annotations/humanml3d/annotations.json"),
)

/x/haowen_motion/lgtm/.env/lib/python3.10/site-packages/pytorch_lightning/core/saving.py:184: Found keys that are in the model state dict but not in the checkpoint: ['tmr_encoders.whole_body.tmr_model.motion_encoder.tokens', 'tmr_encoders.whole_body.tmr_model.motion_encoder.projection.weight', 'tmr_encoders.whole_body.tmr_model.motion_encoder.projection.bias', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.self_attn.in_proj_weight', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.self_attn.in_proj_bias', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.self_attn.out_proj.weight', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.self_attn.out_proj.bias', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.linear1.weight', 'tmr_encoders.whole_body.tmr_model.motion_encoder.seqTransEncoder.layers.0.linear1.bias', 'tmr_encoders.whole_body.tmr_model.motion_encoder

In [3]:
# The input of LGTM
full_body_text = "a man walks forward sits in a chair then with his right hand."
part_level_texts = (await tool.augment("xxx", full_body_text))["xxx"]

In [4]:
# It Generate 180 frames of motion. You can modify the part-level input as you want
part_level_normalized_motion = model.sample(
    whole_texts=[full_body_text],
    part_texts={
        "head": [part_level_texts.head.text],
        "torso": [part_level_texts.torso.text],
        "left_arm": [part_level_texts.left_arm.text],
        "right_arm": [part_level_texts.right_arm.text],
        "left_leg": [part_level_texts.left_leg.text],
        "right_leg": [part_level_texts.right_leg.text],
    },
    lengths=[180],
    num_inference_steps=1000,
)[0]

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:51<00:00, 19.44it/s]


In [6]:
# Since the network output is part-level representation, you need to recover it to HumanML3D representation
normalized_motion = recover_rearranged_humanml3d_features(part_level_normalized_motion)

# De-normalize with HumanML3D dataset instance
motion = dataset.humanml3d.de_normalize(normalized_motion.cpu().detach().numpy())

# Extract position from it
positions = HumanML3D.recover(motion)

In [7]:
# We offer a simple visualization tool using matplotlib

from lgtm.utils.visualization import animate
from lgtm.utils.transform import exchange_yz
from ipywidgets import HTML

HTML(animate(exchange_yz(positions), HumanML3D.parents, 20).to_html5_video())

HTML(value='<video width="640" height="480" controls autoplay loop>\n  <source type="video/mp4" src="data:vide…

In [None]:
# we also provide a simple and faster tool that converts the positions into bvh mocap files
# which also provides an interactive way for better visualization

from lgtm.utils.visualization import pos_to_bvh
from third_packages.fmbvh.visualization.utils import show_bvh, bvh_to_video


# recover joint rotations from positions and save them to bvh mocap file with a predefined T pose
pos_to_bvh(positions, "output.bvh")

# render the saved bvh file to video
# NOTE: please make sure the opencv-python package is successfully installed
bvh_to_video("output.bvh", "output.mp4", "mp4v")

# show the bvh file in an interactive way 
# NOTE: please make sure the display is valid
"""
    A S D W Z X: camera control
    Q: quit
"""
show_bvh("output.bvh", backend_cv=True)