# Notebook for meta-learning human motion from few random samples from discrete time series

# Meta-learning for regression tasks
1) 1-D regression: Sinewaves: predicting the sinewave that produces the 5 sampled points. learn a function $f_{\theta}(x) = y$ <br>
![title](sinewavesreg.png)
2)2-D Image completion: Given only 10-100 pixels of an image, regress the whole image. given a pixel location (x,y) regress the RGB vector values, $f_{\theta}(x, y) = [y_r, y_b, y_g]^T $
![title](imagesreg.png)
3)3-D Human behaviour understanding potentially learning the complete 3D mesh sequence motion from a few samples. Let M be a sequence of parametrized meshes by shape and pose coeffs [$m(\beta, \theta)_1, m(\beta, \theta)_2, ..., m(\beta, \theta)_T$]. and X = [$x_1, x_2...,x_T$] the discrete time series. The purpose is to learn the sequence M from N<<T samples only, that means learning how the pose parameters vary temporally.
\begin{align}
    Z &= randsample(M, N)\\
    f_{\theta}(Z) &= [ m(\beta, \theta_1), m(\beta, \theta_2),...m(\beta, \theta_T)]
\end{align}

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook
%matplotlib inline

import sys, os
import torch
import torch.nn as nn
import numpy as np
import glob
import learn2learn as l2l
from collections import OrderedDict
from human_body_prior.tools.omni_tools import copy2cpu as c2c
import warnings
from torch import optim
warnings.filterwarnings("ignore")

import trimesh
from human_body_prior.tools.omni_tools import colors
from human_body_prior.mesh import MeshViewer
from human_body_prior.mesh.sphere import points_to_spheres
from notebook_tools import show_image

In [2]:
comp_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(comp_device)

cuda


In [3]:
def loss_meshes(target, predicted_poses, loss_fn, shots, bm):
    loss = torch.zeros(1).to(comp_device)
    betas = target['betas']
    dmpls = target['dmpls']
    poses = target['poses']
    for i in range(shots):
        #print("i: ", i)
      #  print(predicted_poses[i].unsqueeze(0).shape)
       # print(betas.shape)
        #print(dmpls[i].shape)
        target = bm(pose_body=poses[i,3:66].unsqueeze(0), betas=betas, dmpls=dmpls[i].unsqueeze(0))
        mesh = bm(pose_body = predicted_poses[i].unsqueeze(0), betas=betas, dmpls=dmpls[i].unsqueeze(0))
        loss = loss + torch.abs(target.v - mesh.v).mean()
    loss /=shots
    return loss

def adaptation(x, y, learner, adaptation_steps, shots*2, loss_fn, bm):
    #separate data into adaptation/evaluation sets
    adaptation_indices = torch.zeros(x.size(0)).byte()
    adaptation_indices[torch.arange(shots)*2] = 1
    adaptation_data, evaluation_data = x[adaptation_indices], x[1 - adaptation_indices]
    
    adaptation_labels = {}
    evaluation_labels = {}
    adaptation_labels['poses'], evaluation_labels['poses'] = y['poses'][adaptation_indices], y['poses'][1 - adaptation_indices]
    adaptation_labels['trans'], evaluation_labels['trans'] = y['trans'][adaptation_indices], y['trans'][1 - adaptation_indices]
    adaptation_labels['dmpls'], evaluation_labels['dmpls'] = y['dmpls'][adaptation_indices], y['dmpls'][1 - adaptation_indices]
    adaptation_labels['betas'], evaluation_labels['betas'] = y['betas'], y['betas']
    
    for step in range(adaptation_steps):
        train_error = loss_meshes(adaptation_labels, learner(adaptation_data), loss_fn, shots, bm)
        learner.adapt(train_error)
    #for meta-update    
    predicted_poses = learner(evaluation_data)
    valid_error = loss_meshes(evaluation_labels, predicted_poses, loss_fn, shots, bm)
    return valid_error

# Load data, Body model used SMPLH (body, hands and dmpls)

In [4]:
from human_body_prior.body_model.body_model import BodyModel

bm_path = '/home/michalislazarou/PhD/amass/smplh/male/model.npz'
dmpl_path = '/home/michalislazarou/PhD/amass/dmpl/male/model.npz'
npz_bdata_path ='/home/michalislazarou/PhD/amass/datasets/EKUT/234/'
num_betas = 10 # number of body parameters
num_dmpls = 8 # number of DMPL parameters

bm = BodyModel(bm_path=bm_path, num_betas=num_betas, num_dmpls=num_dmpls, path_dmpl=dmpl_path).to(comp_device)
print("Body params: ", bm.pose_body.shape, "hand params: ", bm.pose_hand.shape, "dmpl: ", bm.dmpls.shape)
faces = c2c(bm.f)

Body params:  torch.Size([1, 63]) hand params:  torch.Size([1, 90]) dmpl:  torch.Size([1, 8])


# Create a Class to smaple Human motion examples from a dataset, in our case for this notebook we use the dataset EKUT(KIT Whole-Body Human Motion Database)

In [5]:
class HumanMotionDataset():
    def __init__(self, path_to_data, shots):
        self.img_path = path_to_data
        train_meshes, test_meshes = self.get_labels()
        self.train_meshes = train_meshes
        self.test_meshes = test_meshes
        self.shots = shots
        self.mesh = None
    
    def sample_task(self, train = 'train'):
        """ Sampling a task means sampling an mesh. """ 
        # choose image
        if train=='train':
            index = np.random.randint(len(self.train_meshes))
            self.mesh = self.train_meshes[index]
        else:
            index = np.random.randint(len(self.test_meshes))
            self.mesh = self.test_meshes[index]
        #return mesh
    
    def get_samples(self):
        x = np.sort(np.random.choice(self.mesh['poses'].shape[0], self.shots, replace=False))
        y = {}
        y['poses'] = self.mesh['poses'][x,:]
        y['trans'] = self.mesh['trans'][x,:]
        y['dmpls'] = self.mesh['dmpls'][x,:]
        y['betas'] = self.mesh['betas']
        return x, y
    def get_labels(self):
        data_list = []
        for data_file in glob.glob(os.path.join(self.img_path, '*.npz')):
            bdata = dict(np.load(data_file))
            bdata['poses'] = bdata['poses'][:200,:]
            bdata['trans'] = bdata['trans'][:200,:]
            bdata['dmpls'] = bdata['dmpls'][:200,:]
            data_list.append(bdata)
        return data_list[:70], data_list[70:] 

# In a similar way to the regression problem in sinewaves and image completion in: 
MAML:https://arxiv.org/pdf/1703.03400.pdf <br>
CAVIA:https://arxiv.org/pdf/1810.03642.pdf<br>
CNP: https://arxiv.org/pdf/1807.01613.pdf<br>
a regressor network is used as our meta-learning with the aim to regress the human motion from only a few samples of the complete motion. This can also be viewed as interpolation of motion.

In [6]:
class RegressorNet(nn.Module):
    def __init__(self):
        super(RegressorNet,self).__init__()
        self.net=nn.Sequential(OrderedDict([
            ('l1',nn.Linear(1,128)),
            ('relu1',nn.ReLU()),
            ('l2',nn.Linear(128,128)),
            ('relu2',nn.ReLU()),
            ('l3',nn.Linear(128,128)),
            ('relu3',nn.ReLU()),
            ('l4',nn.Linear(128,128)),
            ('relu4',nn.ReLU()),
            ('l5',nn.Linear(128,63)),
            #('relu5',nn.ReLU()),
            #('l6',nn.Linear(40,1))
        ]))
    
    def forward(self,x):
        return self.net(x)

# Load human motion dataset for single mesh example 

In [55]:
# import meta-dataset
data_list = []
for data_file in glob.glob(os.path.join(npz_bdata_path, '*.npz')):
    bdata = dict(np.load(data_file))
    bdata['poses'] = bdata['poses'][:200,:]
    bdata['trans'] = bdata['trans'][:200,:]
    bdata['dmpls'] = bdata['dmpls'][:200,:]
    data_list.append(bdata)
train_list, test_list = data_list[:70], data_list[70:] 
print(bdata['betas'].shape)
print(len(data_list), len(train_list), len(test_list))

(16,)
79 70 9


# Meta-learning hyperparameters initialized 

In [56]:
alpha=0.01
beta=0.001
shots=50
adaptation_steps=50
meta_batch_size = 10
num_iterations = 20000
regressor = RegressorNet()
regressor = regressor.to(comp_device)

# Single mesh regression:
Target mesh shown<br>
Note that only the pose parameters of the body are regressed, no pose parameters for hands or shape parameters

In [76]:
bdata = data_list[25]

fId = 71 # frame id of the mocap sequence

root_orient = torch.Tensor(bdata['poses'][fId:fId+1, :3]).to(comp_device) # controls the global root orientation
pose_body = torch.Tensor(bdata['poses'][fId:fId+1, 3:66]).to(comp_device) # controls the body
pose_hand = torch.Tensor(bdata['poses'][fId:fId+1, 66:]).to(comp_device) # controls the finger articulation
betas = torch.Tensor(bdata['betas'][:10][np.newaxis]).to(comp_device) # controls the body shape
dmpls = torch.Tensor(bdata['dmpls'][fId:fId+1]).to(comp_device) # controls soft tissue dynamics

body = bm(pose_body=pose_body, betas=betas, dmpls=dmpls)
target = trimesh.Trimesh(vertices=c2c(body.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
target.show()
#print(pose_body.shape)
#print(betas.shape)
#print(dmpls.shape)

# Initial starting mesh (depends on the weights of the regressor net). It can be easily initialized to T pose or another mesh.

In [10]:
#meta learnable parameters
body_pose = torch.tensor(np.zeros((1, 63)).astype('float32'), requires_grad = True, device = comp_device)
#hand_pose = torch.tensor(np.zeros((1, 90)).astype('float32'), requires_grad = True, device = device)
#dmpls = torch.tensor(np.zeros((1, 8)).astype('float32'), requires_grad = True, device = device)
#maml = l2l.algorithms.MAML(body_pose, lr=alpha, first_order=False)
loss=nn.MSELoss()
opt = optim.Adam(regressor.parameters(), alpha)
mesh = bm(pose_body=body_pose, betas=betas, dmpls=dmpls)
body_mesh = trimesh.Trimesh(vertices=c2c(mesh.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
#body_mesh.show()
x=torch.tensor(fId, dtype=torch.float).unsqueeze(0).to(comp_device)
body_pose = regressor(x).unsqueeze(0)
mesh = bm(pose_body=body_pose, betas=betas, dmpls=dmpls)
body_mesh = trimesh.Trimesh(vertices=c2c(mesh.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
body_mesh.show()

# Learn the regressor parameters to predict the pose coefficients for the target mesh, iter=5000

In [11]:
for i in range(5000):
    
    body_pose = regressor(x).unsqueeze(0)
    
    #print(body_pose.shape, pose_body.shape)
    
    mesh = bm(pose_body=body_pose, betas=betas, dmpls=dmpls)
    
    opt.zero_grad()
    
    loss = torch.abs(body.v - mesh.v).mean()
    
    loss.backward(retain_graph=True)
    
    opt.step()
    if i%200==0:
        print("iteration: ", i, "loss: ", loss.item())
   # mesh_view = smpl_model(beta.detach(), theta.detach())
body_mesh = trimesh.Trimesh(vertices=c2c(mesh.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
print(body_mesh.center_mass)

iteration:  0 loss:  0.2315455675125122
iteration:  200 loss:  0.0063395933248102665
iteration:  400 loss:  0.003187261987477541
iteration:  600 loss:  0.0012746956199407578
iteration:  800 loss:  0.0023992510978132486
iteration:  1000 loss:  0.002410980872809887
iteration:  1200 loss:  0.0017530760960653424
iteration:  1400 loss:  0.0023826193064451218
iteration:  1600 loss:  0.0013512757141143084
iteration:  1800 loss:  0.001835006638430059
iteration:  2000 loss:  0.0016843578778207302
iteration:  2200 loss:  0.0014557491522282362
iteration:  2400 loss:  0.0008799847564660013
iteration:  2600 loss:  0.001915905624628067
iteration:  2800 loss:  0.0015892168739810586
iteration:  3000 loss:  0.0016364488983526826
iteration:  3200 loss:  0.0010656730737537146
iteration:  3400 loss:  0.0010100556537508965
iteration:  3600 loss:  0.0009081707103177905
iteration:  3800 loss:  0.001026316313073039
iteration:  4000 loss:  0.0012113482225686312
iteration:  4200 loss:  0.0013003258500248194
ite

# Regressor network mesh prediction

In [34]:
body_mesh = trimesh.Trimesh(vertices=c2c(mesh.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
body_mesh.show()

IndexError: index 1 is out of bounds for dimension 0 with size 1

# Target mesh shown

In [13]:
target.show()

# Regress 50 meshes as a time series signal with the same regressor network.
This can be thought of as learning a parametrized function H from time, t, to mesh, M.<br>

$$H_{\phi}(t_i) \rightarrow M_{t_i}$$

In [14]:
human_motion = HumanMotionDataset(npz_bdata_path, shots)
human_motion.sample_task()
x, y = human_motion.get_samples()
x=torch.tensor(x, dtype=torch.float).unsqueeze(1).to(comp_device)
#print(y['betas'].shape)
#convert to pytorch tensors
y['betas'] = torch.Tensor(y['betas'][:10][np.newaxis]).to(comp_device) # controls the body shape
y['trans'] = torch.Tensor(y['trans']).to(comp_device)
y['poses'] = torch.Tensor(y['poses']).to(comp_device)
y['dmpls'] = torch.Tensor(y['dmpls']).to(comp_device)
# use in learning loop
#target_pose = y['poses'][:,3:66]
#betas = y['betas']
#dmpls = y['dmpls']
print(y['betas'].shape)
print(y['poses'].shape)
print(y['dmpls'].shape)

torch.Size([1, 10])
torch.Size([50, 156])
torch.Size([50, 8])


In [15]:
for i in range(50000):
    
   # print(i)
    
    body_pose = regressor(x)
    
    #print(body_pose.shape)
    
    loss = loss_meshes(y, body_pose, nn.MSELoss(), shots, bm)
    
    #mesh = bm(pose_body=body_pose, betas=betas, dmpls=dmpls)
    
    opt.zero_grad()
    
    #loss = torch.abs(body.v - mesh.v).mean()
    
    loss.backward()
    
    opt.step()
    if i%5000==0:
        print("iteration: ", i, "loss: ", loss.item())

iteration:  0 loss:  0.03634312003850937
iteration:  5000 loss:  0.024450629949569702
iteration:  10000 loss:  0.02453804947435856
iteration:  15000 loss:  0.02445356361567974
iteration:  20000 loss:  0.024568883702158928
iteration:  25000 loss:  0.02450978010892868
iteration:  30000 loss:  0.024461500346660614
iteration:  35000 loss:  0.024505579844117165
iteration:  40000 loss:  0.024472976103425026
iteration:  45000 loss:  0.02445121295750141


# View a target mesh example

In [75]:
print(x[16])
i=16
t = bm(pose_body=y['poses'][i,3:66].unsqueeze(0), betas=y['betas'], dmpls=y['dmpls'][i].unsqueeze(0))
target_mesh = trimesh.Trimesh(vertices=c2c(t.v[0]), faces=faces, vertex_colors=np.tile(colors['white'], (6890, 1)))
body_mesh.show()

tensor([71.], device='cuda:0')


# Meta-learning on a distribution of human motion data



In [None]:
regressor_meta = RegressorNet()
regressor_meta = regressor_meta.to(comp_device)
maml = l2l.algorithms.maml.MAML(metal, lr=alpha, first_order=False)
opt_meta = optim.Adam(maml.parameters(), beta)
loss_fn = nn.MSELoss()
human_distribution = HumanMotionDataset(npz_bdata_path, shots*2)

# Meta-training phase 

In [None]:
for iteration in range(50000):
    opt_meta.zero_grad()
    meta_train_error = 0.0
    meta_train_accuracy =0.0
    for task in range(meta_batch_size):
        learner = maml.clone()
        x, y = human_distribution.get_samples()
        x=torch.tensor(x, dtype=torch.float).unsqueeze(1).to(comp_device)
        y['betas'] = torch.Tensor(y['betas'][:10][np.newaxis]).to(comp_device) # controls the body shape
        y['trans'] = torch.Tensor(y['trans']).to(comp_device)
        y['poses'] = torch.Tensor(y['poses']).to(comp_device)
        y['dmpls'] = torch.Tensor(y['dmpls']).to(comp_device)
        evaluation_error = adaptation(x, y, learner, adaptation_steps, shots, loss_fn, bm)
        
        evaluation_error.backward()
        meta_train_error += evaluation_error.item()
    for p in maml.parameters():
        p.grad.data.mul_(1.0 / meta_batch_size)
    opt.step()
    
    if iteration%5000==0:
        print('Iteration', iteration)
        print('Meta Train Error', meta_train_error / meta_batch_size)