In [39]:
# Code to run in bash console
# cd exps/baseline_h36m
%load_ext autoreload
%autoreload 2

import argparse
import os, sys
import json
import math
import numpy as np
import copy

from config import config

import model as models
from datasets.h36m import H36MDataset
from utils.logger import get_logger, print_and_log_info
from utils.pyt_utils import link_file, ensure_dir
from datasets.h36m_eval import H36MEval

from custom_test import test

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# cuda setting to make result deterministic
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--exp-name', type=str, default=None, help='=exp name')
parser.add_argument('--seed', type=int, default=888, help='=seed')
parser.add_argument('--temporal-only', action='store_true', help='=temporal only')
parser.add_argument('--layer-norm-axis', type=str, default='spatial', help='=layernorm axis')
# default is False for 'store_true'
parser.add_argument('--with-normalization', action='store_true', help='=use layernorm')
parser.add_argument('--spatial-fc', action='store_true', help='=use only spatial fc')
parser.add_argument('--num', type=int, default=64, help='=num of blocks')
parser.add_argument('--weight', type=float, default=1., help='=loss weight')

# pass argument without command line
import shlex
argString = '--seed 888 --exp-name baseline.txt --layer-norm-axis spatial --with-normalization --num 48'
args = parser.parse_args(shlex.split(argString))

torch.use_deterministic_algorithms(True)
acc_log = open(args.exp_name, 'a')
torch.manual_seed(args.seed)
writer = SummaryWriter()

config.motion_fc_in.temporal_fc = args.temporal_only
config.motion_fc_out.temporal_fc = args.temporal_only
config.motion_mlp.norm_axis = args.layer_norm_axis
config.motion_mlp.spatial_fc_only = args.spatial_fc
config.motion_mlp.with_normalization = args.with_normalization
config.motion_mlp.num_layers = args.num

# config.motion_rnn.with_normalization = args.with_normalization

acc_log.write(''.join('Seed : ' + str(args.seed) + '\n'))

def get_dct_matrix(N):
	dct_m = np.eye(N)
	for k in np.arange(N):
		for i in np.arange(N):
			w = np.sqrt(2 / N)
			if k == 0:
				w = np.sqrt(1 / N)
			dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
	idct_m = np.linalg.inv(dct_m)
	return dct_m, idct_m

# size: (1,T,T)
dct_m,idct_m = get_dct_matrix(config.motion.h36m_input_length_dct)
dct_m = torch.tensor(dct_m).float().cuda().unsqueeze(0)
idct_m = torch.tensor(idct_m).float().cuda().unsqueeze(0)

def update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer) :
	if nb_iter < 10000:
		current_lr = max_lr
	elif nb_iter < 30000:
		current_lr = mid_lr
	else:
		current_lr = min_lr

	for param_group in optimizer.param_groups:
		param_group["lr"] = current_lr

	return optimizer, current_lr

def gen_velocity(m):
	dm = m[:, 1:] - m[:, :-1]
	return dm

def train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, mid_lr, min_lr) :

	if config.pre_dct:
		b,n,c = h36m_motion_input.shape
		h36m_motion_input_ = h36m_motion_input.clone()
		h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())
	else:
		h36m_motion_input_ = h36m_motion_input.clone()

	motion_pred = model(h36m_motion_input_.cuda())

	if config.post_dct:
		motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

	if config.residual_output:
		offset = h36m_motion_input[:, -1:].cuda()
		motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
	else:
		motion_pred = motion_pred[:, :config.motion.h36m_target_length]

	# calc losses
	b,n,c = h36m_motion_target.shape
	motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
	h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
	loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
	# add position loss and velocity loss
	if config.use_relative_loss:
		motion_pred = motion_pred.reshape(b,n,22,3)
		dmotion_pred = gen_velocity(motion_pred)
		motion_gt = h36m_motion_target.reshape(b,n,22,3)
		dmotion_gt = gen_velocity(motion_gt)
		dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
		loss = loss + dloss
	else:
		loss = loss.mean()

	writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

	# reset gradients
	optimizer.zero_grad()
	# compute gradients by backpropagation
	loss.backward()
	# update params
	optimizer.step()
	optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, mid_lr, min_lr, optimizer)
	writer.add_scalar('LR/train', current_lr, nb_iter)

	return loss.item(), optimizer, current_lr

In [43]:
test_window_size=10
test_state_size=int(config.motion.dim)

if config.model == 'siMLPe':
	model = models.siMLPe(config)
elif config.model == 'siMLPe_RNN':
	model = models.siMLPe_RNN(config, rnn_state_size=test_state_size, rnn_layers=config.motion_rnn.num_layers, num_blocks=config.motion_rnn.num_blocks, window_size=test_window_size)
elif config.model == 'Seq2SeqGRU':
	model = models.Seq2SeqGRU(config, state_size=test_state_size, num_layers=config.motion_rnn.num_layers)

print(model)
total_params = sum(p.numel() for p in model.parameters())
print()
print("Window size:",test_window_size)
print("State size:",test_state_size)
print("Total count of parameters:",total_params)
print("Residual output? ",config.residual_output)
print("Use DCT? ",config.pre_dct)
print("Using recursive residual?",config.motion_rnn.recursive_residual)
print("Using LayerNorm?",config.motion_rnn.with_normalization)
print("Using spatial fc before temporal in RNN?",config.motion_rnn.local_spatial_fc)
print("Temporal layer in RNN:",config.motion_rnn.num_temp_blocks)

siMLPe_RNN(
  (rnn): SlidingRNN(
    (endecoder): GRU(66, 66, batch_first=True)
    (temporal_fc1): Linear(in_features=10, out_features=10, bias=True)
    (temporal_fc): Linear(in_features=10, out_features=1, bias=True)
    (spatial_fc): Linear(in_features=66, out_features=66, bias=True)
    (arr0): Rearrange('b n d -> b d n')
    (spatial_norm): Identity()
  )
)

Window size: 10
State size: 66
Total count of parameters: 31075
Residual output?  False
Use DCT?  False
Using recursive residual? True
Using LayerNorm? False
Using spatial fc before temporal in RNN? True
Temporal layer in RNN: 1


In [44]:
model.train()
model.cuda()

# dataset = (T-by-C x_in, N-by-C x_out)
config.motion.h36m_target_length = config.motion.h36m_target_length_train
dataset = H36MDataset(config, 'train', config.data_aug)

# separate into batches (input, target) with size (batch_size,T,C) and (batch_size,N,C)
shuffle = True
sampler = None
dataloader = DataLoader(dataset, batch_size=config.batch_size,
						num_workers=config.num_workers, drop_last=True,
						sampler=sampler, shuffle=shuffle, pin_memory=True)

eval_config = copy.deepcopy(config)
eval_config.motion.h36m_target_length = eval_config.motion.h36m_target_length_eval
eval_dataset = H36MEval(eval_config, 'test')

shuffle = False
sampler = None
# separate into batches (input, target) with size (batch_size,T=50,K,3) and (batch_size,N=25,K,3)
eval_dataloader = DataLoader(eval_dataset, batch_size=128,
						num_workers=1, drop_last=False,
						sampler=sampler, shuffle=shuffle, pin_memory=True)


# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(),
							 lr=config.cos_lr_max,
							 weight_decay=config.weight_decay)

ensure_dir(config.snapshot_dir)
logger = get_logger(config.log_file, 'train')
link_file(config.log_file, config.link_log_file)

print_and_log_info(logger, json.dumps(config, indent=4, sort_keys=True))

# continue training from a checkpoint
if config.model_pth is not None :
	state_dict = torch.load(config.model_pth)
	model.load_state_dict(state_dict, strict=True)
	print_and_log_info(logger, "Loading model path from {} ".format(config.model_pth))

ln: failed to create symbolic link '/home/gjsk/siMLPe/exps/baseline_h36m/log/log_last.log': File exists


Training

In [None]:
nb_iter = 0
avg_loss = 0
avg_lr = 0
current_lr = config.cos_lr_max

config.save_every = 1500
config.cos_lr_total_iters = 19500
baseline_results = [23.8,44.4,76.1,88.2,107.4,121.6,131.6,136.6]

# about 1 min per 1000 iterations
while (nb_iter + 1) < config.cos_lr_total_iters:

	for (h36m_motion_input, h36m_motion_target) in dataloader:

		loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_mid, config.cos_lr_min)
		avg_loss += loss
		avg_lr += current_lr

		if (nb_iter + 1) % config.print_every ==  0 :
			avg_loss = avg_loss / config.print_every
			avg_lr = avg_lr / config.print_every

			print_and_log_info(logger, "Iter {} Summary: ".format(nb_iter + 1))
			print_and_log_info(logger, f"\t lr: {avg_lr} \t Training loss: {avg_loss}")
			avg_loss = 0
			avg_lr = 0

		if (nb_iter + 1) % config.save_every ==  0 :
			torch.save(model.state_dict(), config.snapshot_dir + '/model-iter-' + str(nb_iter + 1) + '.pth')
			model.eval()
			acc_tmp = test(eval_config, model, eval_dataloader)
			print(acc_tmp)
			print([round(float(acc_tmp[i]-baseline_results[i]),2) for i in range(8)])
			acc_log.write(''.join(str(nb_iter + 1) + '\n'))
			line = ''
			for ii in acc_tmp:
				line += str(ii) + ' '
			line += '\n'
			acc_log.write(''.join(line))
			model.train()

		if (nb_iter + 1) == config.cos_lr_total_iters :
			break
		nb_iter += 1
	print("Iter number:",nb_iter)

writer.close()

Iter number: 688
Iter number: 1376
[np.float64(17.5), np.float64(34.4), np.float64(63.6), np.float64(75.5), np.float64(94.7), np.float64(108.4), np.float64(119.7), np.float64(127.0)]
[-6.3, -10.0, -12.5, -12.7, -12.7, -13.2, -11.9, -9.6]
Iter number: 2064
Iter number: 2752
[np.float64(16.5), np.float64(32.9), np.float64(62.0), np.float64(74.0), np.float64(93.2), np.float64(106.8), np.float64(117.9), np.float64(125.1)]
[-7.3, -11.5, -14.1, -14.2, -14.2, -14.8, -13.7, -11.5]
Iter number: 3440
Iter number: 4128
[np.float64(16.2), np.float64(32.5), np.float64(61.5), np.float64(73.6), np.float64(92.9), np.float64(107.1), np.float64(119.0), np.float64(126.5)]
[-7.6, -11.9, -14.6, -14.6, -14.5, -14.5, -12.6, -10.1]
Iter number: 4816
Iter number: 5504
[np.float64(15.9), np.float64(32.1), np.float64(61.1), np.float64(73.3), np.float64(92.9), np.float64(106.8), np.float64(118.4), np.float64(126.0)]
[-7.9, -12.3, -15.0, -14.9, -14.5, -14.8, -13.2, -10.6]
Iter number: 6192
Iter number: 6880
[np.fl

Manual test

In [None]:
from einops.layers.torch import Rearrange
arr0 = Rearrange('b n d -> b d n')
arr1 = Rearrange('b d n -> b n d')

nb_iter = 0
avg_loss = 0
avg_lr = 0

(h36m_motion_input, h36m_motion_target) = next(iter(dataloader))

# loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min)
# train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, min_lr)
total_iter, max_lr, min_lr = config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min

# DCT
b,n,c = h36m_motion_input.shape
h36m_motion_input_ = h36m_motion_input.clone()
h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())

In [None]:
import model as models
# model = models.siMLPe_RNN(config, rnn_state_size=int(config.motion.dim*1.5), rnn_layers=config.motion_rnn.num_layers, num_blocks=config.motion_rnn.num_blocks).cuda()
test_model = models.SlidingGRU(config, state_size=int(config.motion.dim), num_layers=config.motion_rnn.num_layers, window_size=5).cuda()

In [None]:
# motion_pred = model(h36m_motion_input_.cuda())
x = h36m_motion_input_.cuda()

B, T, C = x.size()
assert(C == test_model.config.motion.dim)

# Encoder: start with zero hidden states
encoder_out, rnn_states = test_model.endecoder(x)  # hidden: [num_layers, B, state_size]

# Decoder initialization
last_input_frame = x[:, -1:, :]  # Last time step of input as initial input [B, 1, C]
decoder_input = last_input_frame.clone()

# size = [B, window_size, state_size]
encoder_window = encoder_out[:, -test_model.window_size:, :]

output_frames = torch.zeros(B, T, C).cuda()
for frame_id in range(T):
	# Decoder: # [B, 1, C]
	decoder_out, rnn_states = test_model.endecoder(decoder_input, rnn_states)

	# Sliding window
	encoder_window = torch.cat([encoder_window[:, 1:, :], decoder_out], dim=1)
	_decoder_out = test_model.arr0(test_model.temporal_fc(test_model.arr0(encoder_window)))
	_decoder_out = test_model.spatial_fc(_decoder_out)

	# decoder_out = test_model.temporal_fc(decoder_out)  # [B, 1, C]
	# decoder_out = test_model.fc1(decoder_out) + decoder_out  # [B, 1, C]

	if test_model.config.motion_rnn.recursive_residual:
		# Residual method 1 (recursive residual; same as in 2017 Martinez paper):
		new_frame = test_model.spatial_norm(_decoder_out) + decoder_input
	else:
		# Residual method 2 (residual from the last input frame):
		new_frame = test_model.spatial_norm(_decoder_out) + last_input_frame

	output_frames[:, frame_id:frame_id+1, :] = new_frame
	decoder_input = new_frame  # Next input is current output

RuntimeError: The expanded size of the tensor (1) must match the existing size (5) at non-singleton dimension 1.  Target sizes: [256, 1, 66].  Tensor sizes: [256, 5, 66]

In [None]:
_decoder_out.size()

torch.Size([256, 5, 66])

In [None]:
# IDCT
motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

# add residual
if config.residual_output:
	offset = h36m_motion_input[:, -1:].cuda()
	motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
else:
	motion_pred = motion_pred[:, :config.motion.h36m_target_length]

# calc losses
b,n,c = h36m_motion_target.shape
motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
# add position loss and velocity loss
if config.use_relative_loss:
	motion_pred = motion_pred.reshape(b,n,22,3)
	dmotion_pred = gen_velocity(motion_pred)
	motion_gt = h36m_motion_target.reshape(b,n,22,3)
	dmotion_gt = gen_velocity(motion_gt)
	dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
	loss = loss + dloss
else:
	loss = loss.mean()

writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

# reset gradients
optimizer.zero_grad()
# compute gradients by backpropagation
loss.backward()
# update params
optimizer.step()
optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer)
writer.add_scalar('LR/train', current_lr, nb_iter)

return loss.item(), optimizer, current_lr