In [15]:

import sys, os

import importlib, time
import traceback
import numpy as np
import time

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import DataLoader

from humor.utils.config_new import ConfigParser
from humor.utils.logging import Logger, class_name_to_file_name, mkdir, cp_files
from humor.utils.torch import get_device, save_state, load_state
from humor.utils.stats import StatTracker
from humor.models.model_utils import step

NUM_WORKERS = 4
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
config_path = r"configs\train_motion_vae.yaml"
config_parser_yaml = ConfigParser(config_path)
args_obj, _ = config_parser_yaml.parse('train')
# See config
dict_attr = ['base_dict', 'model_dict', 'dataset_dict', 'loss_dict']
for attr in dict_attr:
    print(f"{attr}: {getattr(args_obj, attr)}")
args = args_obj.base

Using default: {'eps', 'ckpt', 'use_adam', 'load_optim', 'beta1', 'beta2', 'decay'}
Using default: {'model_use_smpl_joint_inputs', 'detach_sched_samp', 'output_delta'}
Using default: {'frames_out_step_size', 'splits_path', 'data_noise_std'}
Using default: {'kl_loss_cycle_len', 'smpl_vert_consistency_loss'}
base_dict: {'dataset': 'AmassDiscreteDataset', 'model': 'MotionVAE', 'loss': 'HumorLoss', 'out': './out/motion_vae', 'ckpt': None, 'gpu': 0, 'batch_size': 200, 'print_every': 10, 'epochs': 200, 'val_every': 2, 'save_every': 25, 'lr': 0.001, 'beta1': 0.9, 'beta2': 0.999, 'eps': 1e-08, 'sched_milestones': [50, 80, 140], 'sched_decay': 0.5, 'decay': 0.0, 'load_optim': True, 'use_adam': False, 'sched_samp_start': 150, 'sched_samp_end': 160}
model_dict: {'out_rot_rep': 'aa', 'in_rot_rep': 'mat', 'latent_size': 128, 'steps_in': 1, 'output_delta': True, 'model_data_config': 'smpl+joints+contacts', 'detach_sched_samp': True, 'model_use_smpl_joint_inputs': False}
dataset_dict: {'data_paths': 

In [23]:
from humor.models.humor_diffusion_transformer import HumorDiffusionTransformer
print(f"Model: {args_obj.model_dict}")

model = HumorDiffusionTransformer(**args_obj.model_dict,
                                  vae_ckpt=r'out\motion_vae\20250506_014121\checkpoints\best_model.pth',
                                  vae_cfg=r'configs\train_motion_vae.yaml',
                                  model_smpl_batch_size=args.batch_size,)

Model: {'out_rot_rep': 'aa', 'in_rot_rep': 'mat', 'latent_size': 128, 'steps_in': 1, 'output_delta': True, 'model_data_config': 'smpl+joints+contacts', 'detach_sched_samp': True, 'model_use_smpl_joint_inputs': False, 'pose_token_dim': 256, 'diffusion_base_dim': 256, 'nhead': 4, 'num_layers': 6, 'dim_feedforward': 1024, 'dropout': 0.1, 'cfg_scale': 4.0}
Using default: {'ckpt', 'decay', 'eps', 'use_adam', 'load_optim', 'beta1', 'beta2'}
Using default: {'detach_sched_samp', 'output_delta', 'model_use_smpl_joint_inputs'}
Using default: {'data_noise_std', 'frames_out_step_size', 'splits_path'}
Using default: {'smpl_vert_consistency_loss', 'kl_loss_cycle_len'}


  full_checkpoint_dict = torch.load(load_path, map_location=map_location)


In [11]:
from humor.models.motion_v_a_e import MotionVAE
vae = MotionVAE(**args_obj.model_dict,
                 model_smpl_batch_size=args.batch_size,)

In [12]:
device = get_device(args.gpu)

Using detected GPU...


In [13]:
vae.to(device)
vae_ckpt = r'out\motion_vae\20250506_014121\checkpoints\best_train_model.pth'
load_state(vae_ckpt, vae, map_location=device)

  full_checkpoint_dict = torch.load(load_path, map_location=map_location)


(131, 0.009386874148801256, 0.008887221386369066)

In [14]:
  # load loss class and instantiate
from humor.losses.humor_loss import HumorLoss
loss_func = HumorLoss(**args_obj.loss_dict,
                    smpl_batch_size=args.batch_size*args_obj.dataset.sample_num_frames) # assumes loss is HumorLoss
loss_func.to(device)
# freeze params in loss
for param in loss_func.parameters():
    param.requires_grad = False

In [17]:
from humor.datasets. amass_discrete_dataset import AmassDiscreteDataset

test_dataset = AmassDiscreteDataset(split='test', **args_obj.dataset_dict,)
# create loaders

test_loader = DataLoader(test_dataset,
                            batch_size=4,
                            shuffle=False, 
                            num_workers=NUM_WORKERS,
                            pin_memory=True)

Loading data from../datasets/AMASS/amass_processed
Logger must be initialized before logging!
This split contains 423 sequences (that meet the duration criteria).
Logger must be initialized before logging!
The dataset contains 4136 sub-sequences in total.
Logger must be initialized before logging!


In [None]:
skip_chunk = 172
ctn = 0
for i, data in enumerate(test_loader):
    if (i % skip_chunk) != 0:
        continue
    print(f"i: {i}")
    print(f"ctn: {ctn}")
    ctn += 1

In [8]:
print(len(test_loader))

12


In [16]:
losses = []
vae.to(device)
vae.eval()
for i, data in enumerate(test_loader):
    try:
        # run model
        loss, stats_dict = step(vae, loss_func, data, test_dataset, device, cur_epoch=0, mode='train', use_gt_p=1.0)
        if torch.isnan(loss).item():
            Logger.log('WARNING: NaN loss. Skipping to next data...')
            torch.cuda.empty_cache()
            continue

    except (RuntimeError, AssertionError) as e:
        Logger.log(f'WARNING: {e}')
        raise e

    losses.append(loss.item())


In [27]:
model.eval()
model.to(device)
x_input = torch.randn(2000, 1, 339).to(device)
x_prev = torch.randn(2000, 1, 339).to(device)

out = model(x_prev, x_input)
print(out.keys())  # (2000, 1, 339)

dict_keys(['pred_noise', 'z', 'z_noise'])


In [17]:
print(f"loss: {np.mean(losses)}")

loss: 0.013733987061035199


In [29]:
input_list = model.input_dim_list
print(f"output_list: {input_list}")
print(sum(input_list))

output_list: [3, 3, 9, 3, 189, 66, 66]
339


In [30]:
from humor.models.diffusion_transformer import DiffusionTransformer

diff_model = DiffusionTransformer(128, 256, input_list)
diff_model.to(device)

DiffusionTransformer(
  (pose_tokenizer): PoseTokenizer(
    (part_proj): ModuleList(
      (0-1): 2 x Linear(in_features=3, out_features=256, bias=True)
      (2): Linear(in_features=9, out_features=256, bias=True)
      (3): Linear(in_features=3, out_features=256, bias=True)
      (4): Linear(in_features=189, out_features=256, bias=True)
      (5-6): 2 x Linear(in_features=66, out_features=256, bias=True)
    )
  )
  (latent_proj): Linear(in_features=128, out_features=256, bias=True)
  (pose_proj): Linear(in_features=256, out_features=256, bias=True)
  (time_embed): Sequential(
    (0): Linear(in_features=256, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=256, bias=True)
  )
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (li

In [31]:
x_input = torch.randn(2000, 339).to(device)
z_input = torch.randn(2000, 128).to(device)
t=torch.randint(0, 1000, (2000,)).to(device)
output = diff_model(z_input, x_input, t)
print(f"output: {output.shape}")


output: torch.Size([2000, 128])


In [None]:
from humor.fitting.config import parse_args

original_cfg_path = r'configs\fit_amass_keypts.cfg'
yaml_cfg_path = r'configs\fit_amass_keypts.yaml'