In [21]:
# Code to run in bash console
# cd exps/baseline_h36m
%load_ext autoreload
%autoreload 2

import argparse
import os, sys
import json
import math
import numpy as np
import copy

from config import config

import model as models
from datasets.h36m import H36MDataset
from utils.logger import get_logger, print_and_log_info
from utils.pyt_utils import link_file, ensure_dir
from datasets.h36m_eval import H36MEval

from custom_test import test

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# Run "conda install -c conda-forge ipywidgets" for tqdm to work in notebook
from tqdm.notebook import tqdm

# cuda setting to make result deterministic
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

# torch.cuda.empty_cache()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--exp-name', type=str, default=None, help='=exp name')
parser.add_argument('--seed', type=int, default=888, help='=seed')
parser.add_argument('--temporal-only', action='store_true', help='=temporal only')
parser.add_argument('--layer-norm-axis', type=str, default='spatial', help='=layernorm axis')
# default is False for 'store_true'
parser.add_argument('--with-normalization', action='store_true', help='=use layernorm')
parser.add_argument('--spatial-fc', action='store_true', help='=use only spatial fc')
parser.add_argument('--num', type=int, default=64, help='=num of blocks')
parser.add_argument('--weight', type=float, default=1., help='=loss weight')

# pass argument without command line
import shlex
argString = '--seed 888 --exp-name baseline.txt --layer-norm-axis spatial --with-normalization --num 48'
args = parser.parse_args(shlex.split(argString))

torch.use_deterministic_algorithms(True)
acc_log = open(args.exp_name, 'a')
torch.manual_seed(args.seed)
writer = SummaryWriter()

config.motion_fc_in.temporal_fc = args.temporal_only
config.motion_fc_out.temporal_fc = args.temporal_only
config.motion_mlp.norm_axis = args.layer_norm_axis
config.motion_mlp.spatial_fc_only = args.spatial_fc
config.motion_mlp.with_normalization = args.with_normalization
config.motion_mlp.num_layers = args.num

# config.motion_rnn.with_normalization = args.with_normalization

acc_log.write(''.join('Seed : ' + str(args.seed) + '\n'))

def get_dct_matrix(N):
	dct_m = np.eye(N)
	for k in np.arange(N):
		for i in np.arange(N):
			w = np.sqrt(2 / N)
			if k == 0:
				w = np.sqrt(1 / N)
			dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
	idct_m = np.linalg.inv(dct_m)
	return dct_m, idct_m

# size: (1,T,T)
if config.pre_dct:
	dct_m,idct_m = get_dct_matrix(config.motion.h36m_input_length_dct)
	dct_m = torch.tensor(dct_m).float().cuda().unsqueeze(0)
	idct_m = torch.tensor(idct_m).float().cuda().unsqueeze(0)

def update_lr_multistep(nb_iter, total_iter, max_lr, mid_lr, min_lr, optimizer):
	if nb_iter < 10000:
		current_lr = max_lr
	elif nb_iter < 30000:
		current_lr = mid_lr
	else:
		current_lr = min_lr

	for param_group in optimizer.param_groups:
		param_group["lr"] = current_lr

	return optimizer, current_lr

def gen_velocity(m):
	dm = m[:, 1:] - m[:, :-1]
	return dm

def train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, mid_lr, min_lr) :

	if config.pre_dct:
		b,n,c = h36m_motion_input.shape
		h36m_motion_input_ = h36m_motion_input.clone()
		h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())
	else:
		h36m_motion_input_ = h36m_motion_input.clone()

	motion_pred = model(h36m_motion_input_.cuda())

	if config.post_dct:
		motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

	if config.residual_output:
		offset = h36m_motion_input[:, -1:].cuda()
		motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
	else:
		motion_pred = motion_pred[:, :config.motion.h36m_target_length]

	# calc losses
	b,n,c = h36m_motion_target.shape
	motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
	h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
	loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
	# add position loss and velocity loss
	if config.use_relative_loss:
		motion_pred = motion_pred.reshape(b,n,22,3)
		dmotion_pred = gen_velocity(motion_pred)
		motion_gt = h36m_motion_target.reshape(b,n,22,3)
		dmotion_gt = gen_velocity(motion_gt)
		dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
		loss = loss + dloss
	else:
		loss = loss.mean()

	writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

	# reset gradients
	optimizer.zero_grad()
	# compute gradients by backpropagation
	loss.backward()
	# update params
	optimizer.step()
	optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, mid_lr, min_lr, optimizer)
	writer.add_scalar('LR/train', current_lr, nb_iter)

	return loss.item(), optimizer, current_lr

In [23]:
if config.model == 'siMLPe':
	model = models.siMLPe(config)
elif config.model == 'siMLPe_RNN':
	model = models.SlidingRNN_v2(config)
elif config.model == 'Seq2SeqGRU':
	model = models.Seq2SeqGRU(config)

print(model)
total_params = sum(p.numel() for p in model.parameters())
print()
print("Total count of parameters:",total_params)
print("Residual output? ",config.residual_output)
print("Use DCT? ",config.pre_dct, config.post_dct)
print("Using recursive residual?",config.motion_rnn.recursive_residual)
# print("Using LayerNorm?",config.motion_rnn.with_normalization) (deprecated)
print("Using spatial fc before temporal in RNN?",config.motion_rnn.local_spatial_fc)
print("Temporal layer in RNN:",config.motion_rnn.num_temp_blocks)
# print("Sliding long term encoder in RNN? ",config.motion_rnn.sliding_long_term) (deprecated)
print("History term window size: ",config.motion_rnn.history_window_size)
print("Short term window size: ",config.motion_rnn.short_term_window_size)
print("Encode history? ",config.motion_rnn.encode_history)
print("mlp_layers = ",config.motion_rnn.mlp_layers)
print("rnn_state_size = ",config.motion_rnn.rnn_state_size)
print("rnn_layers = ",config.motion_rnn.rnn_layers)
print("rnn_blocks = ",config.motion_rnn.rnn_blocks)

SlidingRNN_v2(
  (mlp_mini): siMLPe_mini(
    (arr0): Rearrange('b n d -> b d n')
    (arr1): Rearrange('b d n -> b n d')
    (motion_mlp): TransMLP(
      (mlps): Sequential(
        (0): MLPblock(
          (fc0): Temporal_FC(
            (fc): Linear(in_features=12, out_features=12, bias=True)
          )
          (norm0): LN()
        )
      )
    )
    (motion_fc_out): Linear(in_features=99, out_features=66, bias=True)
    (temporal_merge_fc): Linear(in_features=12, out_features=1, bias=True)
  )
  (endecoder): GRU(66, 99, batch_first=True)
  (arr0): Rearrange('b n d -> b d n')
  (fc_encoder): Linear(in_features=66, out_features=99, bias=True)
)

Total count of parameters: 63199
Residual output?  False
Use DCT?  False False
Using recursive residual? True
Using spatial fc before temporal in RNN? True
Temporal layer in RNN: 1
History term window size:  10
Short term window size:  10
Encode history?  True
mlp_layers =  1
rnn_state_size =  99
rnn_layers =  1
rnn_blocks =  1


In [24]:
model.train().cuda()

# dataset = (T-by-C x_in, N-by-C x_out)
config.motion.h36m_target_length = config.motion.h36m_target_length_train
dataset = H36MDataset(config, 'train', config.data_aug)

# separate into batches (input, target) with size (batch_size,T,C) and (batch_size,N,C)
shuffle = True
sampler = None
dataloader = DataLoader(dataset, batch_size=config.batch_size,
						num_workers=config.num_workers, drop_last=True,
						sampler=sampler, shuffle=shuffle, pin_memory=True)

eval_config = copy.deepcopy(config)
eval_config.motion.h36m_target_length = eval_config.motion.h36m_target_length_eval
eval_dataset = H36MEval(eval_config, 'test')

shuffle = False
sampler = None
# separate into batches (input, target) with size (batch_size,T=50,K,3) and (batch_size,N=25,K,3)
eval_dataloader = DataLoader(eval_dataset, batch_size=128,
						num_workers=1, drop_last=False,
						sampler=sampler, shuffle=shuffle, pin_memory=True)


# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(),
							 lr=config.cos_lr_max,
							 weight_decay=config.weight_decay)

ensure_dir(config.snapshot_dir)
logger = get_logger(config.log_file, 'train')
link_file(config.log_file, config.link_log_file)

print_and_log_info(logger, json.dumps(config, indent=4, sort_keys=True))

# continue training from a checkpoint
if config.model_pth is not None:
	state_dict = torch.load(config.model_pth)
	model.load_state_dict(state_dict, strict=True)
	print_and_log_info(logger, "Loading model path from {} ".format(config.model_pth))
	print("Loading model path from {} ".format(config.model_pth))

ln: failed to create symbolic link '/home/gjsk/siMLPe/exps/baseline_h36m/log/log_last.log': File exists


Training

In [None]:
nb_iter = 0
avg_loss = 0
avg_lr = 0
current_lr = config.cos_lr_max

config.save_every = 2500
config.cos_lr_total_iters = 40000
baseline_results = [23.8,44.4,76.1,88.2,107.4,121.6,131.6,136.6]
our_best_results = [11.2, 25.4, 52.7, 64.3, 83.8, 98.7, 111.0, 119.1]

with tqdm(total=config.cos_lr_total_iters, desc="Training") as pbar:
	while (nb_iter + 1) < config.cos_lr_total_iters:
		for (h36m_motion_input, h36m_motion_target) in dataloader:

			loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_mid, config.cos_lr_min)
			avg_loss += loss
			avg_lr += current_lr

			if (nb_iter + 1) % config.print_every ==  0 :
				avg_loss = avg_loss / config.print_every
				avg_lr = avg_lr / config.print_every

				print_and_log_info(logger, "Iter {} Summary: ".format(nb_iter + 1))
				print_and_log_info(logger, f"\t lr: {avg_lr} \t Training loss: {avg_loss}")
				avg_loss = 0
				avg_lr = 0

			if (nb_iter + 1) % config.save_every ==  0 :
				if (nb_iter + 1) > config.cos_lr_total_iters - config.save_every - 1:
					torch.save(model.state_dict(), config.snapshot_dir + '/model-iter-' + str(nb_iter + 1) + '.pth')
				model.eval()
				acc_tmp = test(eval_config, model, eval_dataloader)
				print(f'Iteration {nb_iter + 1} results: {", ".join(str(i) for i in acc_tmp)}')
				print([round(float(acc_tmp[i]-our_best_results[i]),2) for i in range(8)])
				acc_log.write(f"{nb_iter + 1}\n{' '.join(str(a) for a in acc_tmp)}\n")
				model.train()

			if (nb_iter + 1) == config.cos_lr_total_iters :
				break
			nb_iter += 1
		pbar.update(nb_iter - pbar.n)
writer.close()

Training:   0%|          | 0/40000 [00:00<?, ?it/s]

Iteration 2500 results: 16.4, 32.7, 61.6, 73.4, 91.9, 105.3, 116.3, 123.3
[5.2, 7.3, 8.9, 9.1, 8.1, 6.6, 5.3, 4.2]
Iteration 5000 results: 15.0, 31.0, 60.7, 73.3, 93.2, 107.7, 120.4, 128.2
[3.8, 5.6, 8.0, 9.0, 9.4, 9.0, 9.4, 9.1]
Iteration 7500 results: 14.6, 30.5, 60.0, 72.3, 92.2, 107.3, 121.1, 130.0
[3.4, 5.1, 7.3, 8.0, 8.4, 8.6, 10.1, 10.9]
Iteration 10000 results: 14.2, 30.1, 59.8, 72.4, 92.5, 107.4, 120.5, 129.1
[3.0, 4.7, 7.1, 8.1, 8.7, 8.7, 9.5, 10.0]
Iteration 12500 results: 13.9, 29.6, 59.0, 71.4, 91.4, 106.3, 119.5, 128.2
[2.7, 4.2, 6.3, 7.1, 7.6, 7.6, 8.5, 9.1]
Iteration 15000 results: 13.6, 29.3, 58.9, 71.2, 90.8, 105.4, 118.0, 126.1
[2.4, 3.9, 6.2, 6.9, 7.0, 6.7, 7.0, 7.0]
Iteration 17500 results: 13.5, 29.4, 59.7, 72.3, 92.7, 107.9, 121.1, 129.5
[2.3, 4.0, 7.0, 8.0, 8.9, 9.2, 10.1, 10.4]
Iteration 20000 results: 13.3, 28.9, 58.4, 70.7, 90.2, 104.9, 117.8, 126.0
[2.1, 3.5, 5.7, 6.4, 6.4, 6.2, 6.8, 6.9]
Iteration 22500 results: 13.2, 29.0, 58.9, 71.3, 91.1, 105.8, 118.5, 1

Manual test

In [6]:
from einops.layers.torch import Rearrange
arr0 = Rearrange('b n d -> b d n')
arr1 = Rearrange('b d n -> b n d')

nb_iter = 0
avg_loss = 0
avg_lr = 0

(h36m_motion_input, h36m_motion_target) = next(iter(dataloader))

# loss, optimizer, current_lr = train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min)
# train_step(h36m_motion_input, h36m_motion_target, model, optimizer, nb_iter, total_iter, max_lr, min_lr)
total_iter, max_lr, min_lr = config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min

# DCT
b,n,c = h36m_motion_input.shape
h36m_motion_input_ = h36m_motion_input.clone()
h36m_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.h36m_input_length], h36m_motion_input_.cuda())

import model as models
test_model = models.SlidingRNN_v2(config).cuda()

In [None]:
# motion_pred = model(h36m_motion_input_.cuda())
x = h36m_motion_input_.cuda()

B, T, C = x.size()
assert(C == test_model.config.motion.dim)

# Encoder: start with zero hidden states
if test_model.config.motion_rnn.use_gru:
	encoder_out, rnn_states = test_model.endecoder(x[:,:-1,:])
else:
	encoder_out, (rnn_states, cell_states) = test_model.endecoder(x[:,:-1,:])

# Decoder initialization
last_input_frame = x[:, -1:, :]  # Last time step of input as initial input [B, 1, C]
last_rnn_input = last_input_frame.clone()

if test_model.config.motion_rnn.encode_history:
	if self.config.motion_rnn.use_gru:
		encoded_history, _ = self.endecoder(last_input_frame, rnn_states)
	else:
		encoded_history, (_,_) = self.endecoder(last_input_frame, (rnn_states, cell_states))
	window_history = self.fc_decoder(encoded_history)
else:
	# size = [B, history_window_size, test_model.config.motion_rnn.rnn_state_size]
	window_history = x[:, -test_model.config.motion_rnn.history_window_size:, :]

# output_frames = torch.zeros(B, T, C).cuda()
# for frame_id in range(T):
# 	# Decoder: # [B, 1, C]
# 	if test_model.config.motion_rnn.use_gru:
# 		decoder_out, rnn_states = test_model.endecoder(last_rnn_input, rnn_states)
# 	else:
# 		decoder_out, (rnn_states, cell_states) = test_model.endecoder(last_rnn_input, (rnn_states, cell_states))

# 	# decode [B,1,H] to [B,1,C]
# 	_decoder_out = decoder_out
# 	_decoder_out = test_model.fc_decoder(_decoder_out)

# 	if test_model.config.motion_rnn.short_term_window_size > 1:
# 		# generate short term window
# 		frame_start_id = frame_id-(test_model.config.motion_rnn.short_term_window_size-1)
# 		if frame_start_id < 0:
# 			if frame_id == 0:
# 				window_short_term_minus_one = x[:, frame_start_id:, :]
# 			else:
# 				window_short_term_minus_one = torch.cat([x[:, frame_start_id:, :], output_frames[:,:frame_id,:]], dim=1)
# 		else:
# 			window_short_term_minus_one = output_frames[:, frame_start_id:frame_id, :]

# 		mlp_input = torch.cat([window_history, window_short_term_minus_one, _decoder_out], dim=1)
# 	else:
# 		mlp_input = torch.cat([window_history, _decoder_out], dim=1)
	
# 	if test_model.config.motion_rnn.recursive_residual:
# 		# Residual method 1 (recursive residual; same as in 2017 Martinez paper):
# 		new_frame = test_model.mlp_mini(mlp_input) + last_rnn_input
# 	else:
# 		# Residual method 2 (residual from the last input frame):
# 		new_frame = test_model.mlp_mini(mlp_input) + last_input_frame

# 	output_frames[:, frame_id:frame_id+1, :] = new_frame
# 	# Next input is current output
# 	last_rnn_input = new_frame

IndexError: index 1 is out of bounds for dimension 0 with size 1

In [69]:
import mlp
concatenated_dim = config.motion.h36m_input_length_dct+1
test_model.mlp_mini = mlp.MLPblock(dim=config.motion_mlp.hidden_dim,seq=concatenated_dim,use_norm=config.motion_mlp.with_normalization,use_spatial_fc=config.motion_mlp.spatial_fc_only,layernorm_axis=config.motion_mlp.norm_axis)

In [74]:
motion_feats = test_model.mlp_mini.motion_fc_in(mlp_input)
motion_feats = test_model.mlp_mini.arr0(motion_feats)

# MLP block input should be [B,C,T]
motion_feats = test_model.mlp_mini.motion_mlp(motion_feats)

motion_feats = test_model.mlp_mini.arr1(motion_feats)
motion_feats = test_model.mlp_mini.motion_fc_out(motion_feats)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16896x60 and 51x51)

In [45]:
test_model.mlp_mini.motion_mlp

TransMLP(
  (mlps): Sequential(
    (0): MLPblock(
      (fc0): Temporal_FC(
        (fc): Linear(in_features=50, out_features=50, bias=True)
      )
      (norm0): LN()
    )
  )
)

In [None]:
# IDCT
motion_pred = torch.matmul(idct_m[:, :config.motion.h36m_input_length, :], motion_pred)

# add residual
if config.residual_output:
	offset = h36m_motion_input[:, -1:].cuda()
	motion_pred = motion_pred[:, :config.motion.h36m_target_length] + offset
else:
	motion_pred = motion_pred[:, :config.motion.h36m_target_length]

# calc losses
b,n,c = h36m_motion_target.shape
motion_pred = motion_pred.reshape(b,n,22,3).reshape(-1,3)
h36m_motion_target = h36m_motion_target.cuda().reshape(b,n,22,3).reshape(-1,3)
loss = torch.mean(torch.norm(motion_pred - h36m_motion_target, 2, 1))
# add position loss and velocity loss
if config.use_relative_loss:
	motion_pred = motion_pred.reshape(b,n,22,3)
	dmotion_pred = gen_velocity(motion_pred)
	motion_gt = h36m_motion_target.reshape(b,n,22,3)
	dmotion_gt = gen_velocity(motion_gt)
	dloss = torch.mean(torch.norm((dmotion_pred - dmotion_gt).reshape(-1,3), 2, 1))
	loss = loss + dloss
else:
	loss = loss.mean()

writer.add_scalar('Loss/angle', loss.detach().cpu().numpy(), nb_iter)

# reset gradients
optimizer.zero_grad()
# compute gradients by backpropagation
loss.backward()
# update params
optimizer.step()
optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer)
writer.add_scalar('LR/train', current_lr, nb_iter)

return loss.item(), optimizer, current_lr