In [1]:
# Code to run in bash console
# cd exps/baseline_h36m
%load_ext autoreload
%autoreload 2

import argparse
import os, sys
import json
import math
import numpy as np
import copy

from config import config

import model as models
from datasets.h36m import H36MDataset
from utils.logger import get_logger, print_and_log_info
from utils.pyt_utils import link_file, ensure_dir
from datasets.h36m_eval import H36MEval

from custom_test import test

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# cuda setting to make result deterministic
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

2025-04-17 16:54:42.366838: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-17 16:54:42.377272: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744923282.387776   66288 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744923282.391722   66288 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744923282.401606   66288 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--exp-name', type=str, default=None, help='=exp name')
parser.add_argument('--seed', type=int, default=888, help='=seed')
parser.add_argument('--temporal-only', action='store_true', help='=temporal only')
parser.add_argument('--layer-norm-axis', type=str, default='spatial', help='=layernorm axis')
# default is False for 'store_true'
parser.add_argument('--with-normalization', action='store_true', help='=use layernorm')
parser.add_argument('--spatial-fc', action='store_true', help='=use only spatial fc')
parser.add_argument('--num', type=int, default=64, help='=num of blocks')
parser.add_argument('--weight', type=float, default=1., help='=loss weight')

# pass argument without command line
import shlex
argString = '--seed 888 --exp-name baseline.txt --layer-norm-axis spatial --with-normalization --num 48'
args = parser.parse_args(shlex.split(argString))

torch.use_deterministic_algorithms(True)
acc_log = open(args.exp_name, 'a')
torch.manual_seed(args.seed)
writer = SummaryWriter()

config.motion_fc_in.temporal_fc = args.temporal_only
config.motion_fc_out.temporal_fc = args.temporal_only
config.motion_mlp.norm_axis = args.layer_norm_axis
config.motion_mlp.spatial_fc_only = args.spatial_fc
config.motion_mlp.with_normalization = args.with_normalization
config.motion_mlp.num_layers = args.num

# config.motion_rnn.with_normalization = args.with_normalization

acc_log.write(''.join('Seed : ' + str(args.seed) + '\n'))

def get_dct_matrix(N):
	dct_m = np.eye(N)
	for k in np.arange(N):
		for i in np.arange(N):
			w = np.sqrt(2 / N)
			if k == 0:
				w = np.sqrt(1 / N)
			dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
	idct_m = np.linalg.inv(dct_m)
	return dct_m, idct_m

# size: (1,T,T)
dct_m,idct_m = get_dct_matrix(config.motion.h36m_input_length_dct)
dct_m = torch.tensor(dct_m).float().cuda().unsqueeze(0)
idct_m = torch.tensor(idct_m).float().cuda().unsqueeze(0)

def update_lr_multistep(nb_iter, total_iter, max_lr, mid_lr, min_lr, optimizer):
	if nb_iter < 10000:
		current_lr = max_lr
	elif nb_iter < 30000:
		current_lr = mid_lr
	else:
		current_lr = min_lr

	for param_group in optimizer.param_groups:
		param_group["lr"] = current_lr

	return optimizer, current_lr

def gen_velocity(m):
	dm = m[:, 1:] - m[:, :-1]
	return dm

def train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, mid_lr, min_lr) :

	if config.pre_dct:
		b,n,c = h36m_motion_input.shape
		h36m_motion_input_ = h36m_motion_input.clone()
		h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())
	else:
		h36m_motion_input_ = h36m_motion_input.clone()

	motion_pred = model(h36m_motion_input_.cuda())

	if config.post_dct:
		motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

	if config.residual_output:
		offset = h36m_motion_input[:, -1:].cuda()
		motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
	else:
		motion_pred = motion_pred[:, :config.motion.h36m_target_length]

	# calc losses
	b,n,c = h36m_motion_target.shape
	motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
	h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
	loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
	# add position loss and velocity loss
	if config.use_relative_loss:
		motion_pred = motion_pred.reshape(b,n,22,3)
		dmotion_pred = gen_velocity(motion_pred)
		motion_gt = h36m_motion_target.reshape(b,n,22,3)
		dmotion_gt = gen_velocity(motion_gt)
		dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
		loss = loss + dloss
	else:
		loss = loss.mean()

	writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

	# reset gradients
	optimizer.zero_grad()
	# compute gradients by backpropagation
	loss.backward()
	# update params
	optimizer.step()
	optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, mid_lr, min_lr, optimizer)
	writer.add_scalar('LR/train', current_lr, nb_iter)

	return loss.item(), optimizer, current_lr

In [3]:
test_window_size=10
# test_state_size=2**int(np.log(config.motion.dim)/np.log(2))
test_state_size=config.motion.dim
test_num_layers=1

if config.model == 'siMLPe':
	model = models.siMLPe(config)
elif config.model == 'siMLPe_RNN':
	model = models.siMLPe_RNN(config, rnn_state_size=test_state_size, rnn_layers=test_num_layers, num_blocks=config.motion_rnn.num_blocks, window_size=test_window_size)
elif config.model == 'Seq2SeqGRU':
	model = models.Seq2SeqGRU(config, state_size=test_state_size, num_layers=test_num_layers)

print(model)
total_params = sum(p.numel() for p in model.parameters())
print()
print("Window size:",test_window_size)
print("State size:",test_state_size)
print("Total count of parameters:",total_params)
print("Residual output? ",config.residual_output)
print("Use DCT? ",config.pre_dct)
print("Using recursive residual?",config.motion_rnn.recursive_residual)
print("Using LayerNorm?",config.motion_rnn.with_normalization)
print("Using spatial fc before temporal in RNN?",config.motion_rnn.local_spatial_fc)
print("Temporal layer in RNN:",config.motion_rnn.num_temp_blocks)

siMLPe_RNN(
  (rnn): SlidingRNN_v3(
    (mlp_mini): siMLPe_mini(
      (arr0): Rearrange('b n d -> b d n')
      (arr1): Rearrange('b d n -> b n d')
      (motion_mlp): TransMLP(
        (mlps): Sequential(
          (0): MLPblock(
            (fc0): Temporal_FC(
              (fc): Linear(in_features=11, out_features=11, bias=True)
            )
            (norm0): LN()
          )
        )
      )
      (motion_fc_in): Linear(in_features=66, out_features=66, bias=True)
      (motion_fc_out): Linear(in_features=66, out_features=66, bias=True)
      (temporal_merge_fc): Linear(in_features=11, out_features=1, bias=True)
    )
    (endecoder): GRU(66, 66, batch_first=True)
    (arr0): Rearrange('b n d -> b d n')
    (fc_decoder): Linear(in_features=66, out_features=66, bias=True)
    (spatial_fc): Linear(in_features=66, out_features=66, bias=True)
    (temporal_merge_fc): Linear(in_features=11, out_features=1, bias=True)
    (arr1): Rearrange('b d n -> b n d')
  )
)

Window size: 10
St

In [4]:
model.train()
model.cuda()

# dataset = (T-by-C x_in, N-by-C x_out)
config.motion.h36m_target_length = config.motion.h36m_target_length_train
dataset = H36MDataset(config, 'train', config.data_aug)

# separate into batches (input, target) with size (batch_size,T,C) and (batch_size,N,C)
shuffle = True
sampler = None
dataloader = DataLoader(dataset, batch_size=config.batch_size,
						num_workers=config.num_workers, drop_last=True,
						sampler=sampler, shuffle=shuffle, pin_memory=True)

eval_config = copy.deepcopy(config)
eval_config.motion.h36m_target_length = eval_config.motion.h36m_target_length_eval
eval_dataset = H36MEval(eval_config, 'test')

shuffle = False
sampler = None
# separate into batches (input, target) with size (batch_size,T=50,K,3) and (batch_size,N=25,K,3)
eval_dataloader = DataLoader(eval_dataset, batch_size=128,
						num_workers=1, drop_last=False,
						sampler=sampler, shuffle=shuffle, pin_memory=True)


# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(),
							 lr=config.cos_lr_max,
							 weight_decay=config.weight_decay)

ensure_dir(config.snapshot_dir)
logger = get_logger(config.log_file, 'train')
link_file(config.log_file, config.link_log_file)

print_and_log_info(logger, json.dumps(config, indent=4, sort_keys=True))

# continue training from a checkpoint
if config.model_pth is not None :
	state_dict = torch.load(config.model_pth)
	model.load_state_dict(state_dict, strict=True)
	print_and_log_info(logger, "Loading model path from {} ".format(config.model_pth))

ln: failed to create symbolic link '/home/gjsk/siMLPe/exps/baseline_h36m/log/log_last.log': File exists


Training

In [None]:
nb_iter = 0
avg_loss = 0
avg_lr = 0
current_lr = config.cos_lr_max

config.save_every = 1500
config.cos_lr_total_iters = 19500
baseline_results = [23.8,44.4,76.1,88.2,107.4,121.6,131.6,136.6]

# about 1 min per 1000 iterations
while (nb_iter + 1) < config.cos_lr_total_iters:

	for (h36m_motion_input, h36m_motion_target) in dataloader:

		loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_mid, config.cos_lr_min)
		avg_loss += loss
		avg_lr += current_lr

		if (nb_iter + 1) % config.print_every ==  0 :
			avg_loss = avg_loss / config.print_every
			avg_lr = avg_lr / config.print_every

			print_and_log_info(logger, "Iter {} Summary: ".format(nb_iter + 1))
			print_and_log_info(logger, f"\t lr: {avg_lr} \t Training loss: {avg_loss}")
			avg_loss = 0
			avg_lr = 0

		if (nb_iter + 1) % config.save_every ==  0 :
			torch.save(model.state_dict(), config.snapshot_dir + '/model-iter-' + str(nb_iter + 1) + '.pth')
			model.eval()
			acc_tmp = test(eval_config, model, eval_dataloader)
			print(acc_tmp)
			print([round(float(acc_tmp[i]-baseline_results[i]),2) for i in range(8)])
			acc_log.write(''.join(str(nb_iter + 1) + '\n'))
			line = ''
			for ii in acc_tmp:
				line += str(ii) + ' '
			line += '\n'
			acc_log.write(''.join(line))
			model.train()

		if (nb_iter + 1) == config.cos_lr_total_iters :
			break
		nb_iter += 1
	print("Iter number:",nb_iter)

writer.close()

Iter number: 712
Iter number: 1424
[np.float64(17.6), np.float64(35.0), np.float64(66.1), np.float64(78.5), np.float64(97.7), np.float64(110.7), np.float64(120.5), np.float64(126.7)]
[-6.2, -9.4, -10.0, -9.7, -9.7, -10.9, -11.1, -9.9]
Iter number: 2136
Iter number: 2848
[np.float64(16.7), np.float64(34.0), np.float64(66.1), np.float64(79.6), np.float64(101.7), np.float64(118.3), np.float64(132.0), np.float64(141.1)]
[-7.1, -10.4, -10.0, -8.6, -5.7, -3.3, 0.4, 4.5]
Iter number: 3560
Iter number: 4272
[np.float64(15.5), np.float64(32.0), np.float64(62.4), np.float64(74.8), np.float64(94.0), np.float64(107.5), np.float64(118.0), np.float64(125.0)]
[-8.3, -12.4, -13.7, -13.4, -13.4, -14.1, -13.6, -11.6]
Iter number: 4984
Iter number: 5696
[np.float64(14.9), np.float64(31.3), np.float64(61.4), np.float64(73.6), np.float64(92.9), np.float64(107.0), np.float64(118.3), np.float64(125.7)]
[-8.9, -13.1, -14.7, -14.6, -14.5, -14.6, -13.3, -10.9]
Iter number: 6408
Iter number: 7120
[np.float64(14.

Manual test

In [None]:
from einops.layers.torch import Rearrange
arr0 = Rearrange('b n d -> b d n')
arr1 = Rearrange('b d n -> b n d')

nb_iter = 0
avg_loss = 0
avg_lr = 0

(h36m_motion_input, h36m_motion_target) = next(iter(dataloader))

# loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min)
# train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, min_lr)
total_iter, max_lr, min_lr = config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min

# DCT
b,n,c = h36m_motion_input.shape
h36m_motion_input_ = h36m_motion_input.clone()
h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())

In [72]:
import model as models
test_window_size=10
# test_state_size=2**int(np.log(config.motion.dim)/np.log(2))
test_state_size=config.motion.dim
test_num_layers=1
test_model = models.SlidingRNN_v3(config, state_size=test_state_size, num_layers=test_num_layers, window_size=test_window_size).cuda()

In [73]:
# motion_pred = model(h36m_motion_input_.cuda())
x = h36m_motion_input_.cuda()

B, T, C = x.size()
assert(C == test_model.config.motion.dim)

# Decoder initialization
last_input_frame = x[:, -1:, :]  # Last time step of input as initial input [B, 1, C]
last_rnn_input = last_input_frame.clone()

# size = [B, window_size, state_size]
out_frame_window = x[:, -test_model.window_size:, :]

output_frames = torch.zeros(B, T, C).cuda()
for frame_id in range(T):
	# Encoder: start with zero hidden states
	if test_model.config.motion_rnn.use_gru:
		encoder_out, rnn_states = test_model.endecoder(out_frame_window)
	else:
		encoder_out, (rnn_states, cell_states) = test_model.endecoder(out_frame_window)

	# decode [B,1,H] to [B,1,C]
	_decoder_out = test_model.fc_decoder(encoder_out)
	mlp_input = torch.cat([x, _decoder_out], dim=1)
	
	if test_model.config.motion_rnn.recursive_residual:
		# Residual method 1 (recursive residual; same as in 2017 Martinez paper):
		new_frame = test_model.mlp_mini(mlp_input) + last_rnn_input
	else:
		# Residual method 2 (residual from the last input frame):
		new_frame = test_model.mlp_mini(mlp_input) + last_input_frame

	output_frames[:, frame_id:frame_id+1, :] = new_frame
	# Next input is current output
	last_rnn_input = new_frame
	# Sliding frame window
	out_frame_window = torch.cat([out_frame_window[:, 1:, :], new_frame], dim=1)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16896x60 and 51x51)

In [69]:
import mlp
concatenated_dim = config.motion.h36m_input_length_dct+1
test_model.mlp_mini = mlp.MLPblock(dim=config.motion_mlp.hidden_dim,seq=concatenated_dim,use_norm=config.motion_mlp.with_normalization,use_spatial_fc=config.motion_mlp.spatial_fc_only,layernorm_axis=config.motion_mlp.norm_axis)

In [74]:
motion_feats = test_model.mlp_mini.motion_fc_in(mlp_input)
motion_feats = test_model.mlp_mini.arr0(motion_feats)

# MLP block input should be [B,C,T]
motion_feats = test_model.mlp_mini.motion_mlp(motion_feats)

motion_feats = test_model.mlp_mini.arr1(motion_feats)
motion_feats = test_model.mlp_mini.motion_fc_out(motion_feats)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16896x60 and 51x51)

In [45]:
test_model.mlp_mini.motion_mlp

TransMLP(
  (mlps): Sequential(
    (0): MLPblock(
      (fc0): Temporal_FC(
        (fc): Linear(in_features=50, out_features=50, bias=True)
      )
      (norm0): LN()
    )
  )
)

In [None]:
# IDCT
motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

# add residual
if config.residual_output:
	offset = h36m_motion_input[:, -1:].cuda()
	motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
else:
	motion_pred = motion_pred[:, :config.motion.h36m_target_length]

# calc losses
b,n,c = h36m_motion_target.shape
motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
# add position loss and velocity loss
if config.use_relative_loss:
	motion_pred = motion_pred.reshape(b,n,22,3)
	dmotion_pred = gen_velocity(motion_pred)
	motion_gt = h36m_motion_target.reshape(b,n,22,3)
	dmotion_gt = gen_velocity(motion_gt)
	dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
	loss = loss + dloss
else:
	loss = loss.mean()

writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

# reset gradients
optimizer.zero_grad()
# compute gradients by backpropagation
loss.backward()
# update params
optimizer.step()
optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer)
writer.add_scalar('LR/train', current_lr, nb_iter)

return loss.item(), optimizer, current_lr