In [1]:
import os
import json
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from core.log import Logger, save_fig, save_config
from core.data.generators import PoseGenerator
from core.models import Encoder, Decoder, Seq2Seq
from core.utils import save_ckpt
from core.data.data_utils import fetch, read_3d_data, create_2d_data

from pose2motion_arguments import parse_args
from pose2motion_utils import train, evaluate

In [2]:
import argparse
with open('config.json') as f:
    config = argparse.Namespace()
    args = json.load(f)
    d = vars(config)
    for k, v in args.items():
        d[k] = v
d['batch_size'] = 128
print(d)

# os setting
os.environ['OMP_NUM_THREAD'] = '1'
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

# a simple work-around for the dataloader multithread bug
torch.set_num_threads(1)

{'exp_name': 'FinalTest-Original', 'dataset': 'h36m', 'keypoint_source': 'gt', 'actions': '*', 'past': 8, 'future': 16, 'time_stride': 1, 'window_stride': 4, 'batch_size': 128, 'num_workers': 4, 'encoder_ipt_dim': 32, 'encoder_opt_dim': 45, 'decoder_ipt_dim': 45, 'decoder_opt_dim': 45, 'num_recurrent_layers': 2, 'bidirectional': True, 'hid_dim': 256, 'dropout': 0.0, 'visible_devices': '0', 'device': 'cuda', 'lr': 0.002, 'lr_decay': 10000, 'lr_gamma': 0.9, 'start_epoch': 0, 'epochs': 60}


In [3]:
device = config.device
print('==> Using settings {}'.format(config))

print('==> Loading dataset...', config.dataset)
if config.dataset == 'h36m':
    DATASET_NAME = config.dataset.lower()
    from core.data.h36m_dataset import Human36mDataset, TRAIN_SUBJECTS, TEST_SUBJECTS

    dataset_path = Path('data', DATASET_NAME, f'data_3d_{DATASET_NAME}.npz')
    dataset = Human36mDataset(dataset_path)
    subjects_train = TRAIN_SUBJECTS
    subjects_test = TEST_SUBJECTS
else:
    raise KeyError('Invalid dataset')

print('==> Preparing data...')
dataset = read_3d_data(dataset)
keypoints_path = Path('data', DATASET_NAME, f'data_2d_{DATASET_NAME}_{config.keypoint_source}.npz')
keypoints = create_2d_data(keypoints_path, dataset)

print('==> Initializing dataloaders...')
action_filter = None if config.actions == '*' else config.actions.split(',')
# pose_2d_past_segments, pose_3d_past_segments, pose_3d_future_segments, pose_actions = data
data = fetch(subjects_train, dataset, keypoints,
             past=config.past, future=config.future, action_filter=action_filter,
             window_stride=config.window_stride, time_stride=config.time_stride)
train_loader = DataLoader(PoseGenerator(*data),
                          batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)

data = fetch(subjects_test, dataset, keypoints,
             past=config.past, future=config.future, action_filter=action_filter,
             window_stride=None, time_stride=config.time_stride)
valid_loader = DataLoader(PoseGenerator(*data),
                          batch_size=config.batch_size * 4, shuffle=False, num_workers=config.num_workers)
print('Done!')

==> Using settings Namespace(actions='*', batch_size=128, bidirectional=True, dataset='h36m', decoder_ipt_dim=45, decoder_opt_dim=45, device='cuda', dropout=0.0, encoder_ipt_dim=32, encoder_opt_dim=45, epochs=60, exp_name='FinalTest-Original', future=16, hid_dim=256, keypoint_source='gt', lr=0.002, lr_decay=10000, lr_gamma=0.9, num_recurrent_layers=2, num_workers=4, past=8, start_epoch=0, time_stride=1, visible_devices='0', window_stride=4)
==> Loading dataset... h36m
==> Preparing data...
==> Initializing dataloaders...
==> Fetching subject: S1
==> Fetching subject: S5
==> Fetching subject: S6
==> Fetching subject: S7
==> Fetching subject: S8
Generating 386568 pose sequences...
==> Fetching subject: S9
==> Fetching subject: S11
Generating 67316 pose sequences...
Done!


In [4]:
ckpt = 'ckpt_best.pth.tar'
encoder = Encoder(config.encoder_ipt_dim, config.encoder_opt_dim,
                  hid_dim=config.hid_dim, n_layers=config.num_recurrent_layers,
                  bidirectional=config.bidirectional, dropout_ratio=config.dropout)
decoder = Decoder(config.decoder_ipt_dim, config.decoder_opt_dim,
                  hid_dim=config.hid_dim, n_layers=config.num_recurrent_layers,
                  bidirectional=config.bidirectional, dropout_ratio=config.dropout)
model_pos = Seq2Seq(encoder, decoder).to(device)
model_pos.load_state_dict(torch.load(ckpt)['state_dict'])

<All keys matched successfully>

In [5]:
def acc(predicted, labels):
    return (predicted == labels).sum().item() / predicted.size(0)

n_epoch = 10

In [6]:
class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = nn.Sequential(
            # Defining a 2D convolution layer
            nn.Conv2d(3, 16, kernel_size=5, stride=2, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # Defining another 2D convolution layer
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 15)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x
    
model_act = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model_act.parameters(), lr=config.lr / 10)
lr=config.lr / 10
from core.utils import AverageMeter, lr_decay
for epoch in range(n_epoch):
    for i, data in enumerate(train_loader):
        loss_act = AverageMeter()
        acc_act = AverageMeter()
        
        gamma = 0.9
        lr = lr * gamma ** epoch
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        # Measure data loading time
        pose_2d = data['pose_2d']
        act_type = data['action_type'].to(device)
        motion_gt = data['future_pose_3d']
        batch, seq_len = pose_2d.size()[:2]
        _, future_seq_len = motion_gt.size()[:2]
        opt_dim = model_pos.encoder.opt_dim

        optimizer.zero_grad()
        with torch.no_grad():
            # reshape, for 3d poses and motion, ignore the hip joint
            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
        act_prediction = model_act(pose)
        loss = criterion(act_prediction, act_type)
        loss.backward()
        optimizer.step()
    
        loss_act.update(loss.item())
        acc_act.update(acc(act_prediction.max(1)[1], act_type))

        if i % 300 == 0:
            print(f'ep: {epoch:3d} |iter: {i:4d} | loss: {loss_act.avg:.4f} | acc: {acc_act.avg:.3f}')
            
    # validation
    model_act.eval()
    with torch.no_grad():
        vloss_act = AverageMeter()
        vacc_act = AverageMeter()
        for i, data in enumerate(valid_loader):
            loss_act = AverageMeter()
            acc_act = AverageMeter()

            # Measure data loading time
            pose_2d = data['pose_2d']
            act_type = data['action_type'].to(device)
            motion_gt = data['future_pose_3d']
            batch, seq_len = pose_2d.size()[:2]
            _, future_seq_len = motion_gt.size()[:2]
            opt_dim = model_pos.encoder.opt_dim

            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
            act_prediction = model_act(pose)
            vacc_act.update(acc(act_prediction.max(1)[1], act_type))
        print(f'ep: {epoch:3d} |iter: {0:4d} | loss: {0.0:.4f} | acc: {vacc_act.avg:.3f}\n')
        model_act.train()

ep:   0 |iter:    0 | loss: 2.6873 | acc: 0.055
ep:   0 |iter:  300 | loss: 1.5925 | acc: 0.484
ep:   0 |iter:  600 | loss: 1.3736 | acc: 0.539
ep:   0 |iter:  900 | loss: 1.3814 | acc: 0.555
ep:   0 |iter: 1200 | loss: 1.4008 | acc: 0.516
ep:   0 |iter: 1500 | loss: 1.2479 | acc: 0.625
ep:   0 |iter: 1800 | loss: 1.0414 | acc: 0.680
ep:   0 |iter: 2100 | loss: 1.0350 | acc: 0.625
ep:   0 |iter: 2400 | loss: 1.0922 | acc: 0.672
ep:   0 |iter: 2700 | loss: 0.8604 | acc: 0.742
ep:   0 |iter: 3000 | loss: 0.9393 | acc: 0.688
ep:   0 |iter:    0 | loss: 0.0000 | acc: 0.439

ep:   1 |iter:    0 | loss: 0.9723 | acc: 0.695
ep:   1 |iter:  300 | loss: 0.9202 | acc: 0.656
ep:   1 |iter:  600 | loss: 0.7841 | acc: 0.773
ep:   1 |iter:  900 | loss: 1.1049 | acc: 0.633
ep:   1 |iter: 1200 | loss: 0.9326 | acc: 0.711
ep:   1 |iter: 1500 | loss: 0.8259 | acc: 0.766
ep:   1 |iter: 1800 | loss: 0.9304 | acc: 0.688
ep:   1 |iter: 2100 | loss: 0.7925 | acc: 0.734
ep:   1 |iter: 2400 | loss: 0.8822 | ac

In [7]:
class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = nn.Sequential(
            # Defining a 2D convolution layer
            nn.Conv2d(3, 16, kernel_size=5, stride=2, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # Defining another 2D convolution layer
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Linear(256, 15)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x
    
model_act = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model_act.parameters(), lr=config.lr / 10)
lr=config.lr / 10
from core.utils import AverageMeter, lr_decay
for epoch in range(n_epoch):
    for i, data in enumerate(train_loader):
        loss_act = AverageMeter()
        acc_act = AverageMeter()

        gamma = 0.9
        lr = lr * gamma ** epoch
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
            
        # Measure data loading time
        pose_2d = data['pose_2d']
        act_type = data['action_type'].to(device)
        motion_gt = data['future_pose_3d']
        batch, seq_len = pose_2d.size()[:2]
        _, future_seq_len = motion_gt.size()[:2]
        opt_dim = model_pos.encoder.opt_dim

        optimizer.zero_grad()
        with torch.no_grad():
            # reshape, for 3d poses and motion, ignore the hip joint
            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
        act_prediction = model_act(motion)
        loss = criterion(act_prediction, act_type)
        loss.backward()
        optimizer.step()
    
        loss_act.update(loss.item())
        acc_act.update(acc(act_prediction.max(1)[1], act_type))

        if i % 300 == 0:
            print(f'ep: {epoch:3d} |iter: {i:4d} | loss: {loss_act.avg:.4f} | acc: {acc_act.avg:.3f}')
            
    # validation
    model_act.eval()
    with torch.no_grad():
        vloss_act = AverageMeter()
        vacc_act = AverageMeter()
        for i, data in enumerate(valid_loader):
            loss_act = AverageMeter()
            acc_act = AverageMeter()

            # Measure data loading time
            pose_2d = data['pose_2d']
            act_type = data['action_type'].to(device)
            motion_gt = data['future_pose_3d']
            batch, seq_len = pose_2d.size()[:2]
            _, future_seq_len = motion_gt.size()[:2]
            opt_dim = model_pos.encoder.opt_dim

            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
            act_prediction = model_act(motion)
            vacc_act.update(acc(act_prediction.max(1)[1], act_type))
        print(f'ep: {epoch:3d} |iter: {0:4d} | loss: {0.0:.4f} | acc: {vacc_act.avg:.3f}\n')
        model_act.train()

ep:   0 |iter:    0 | loss: 2.7005 | acc: 0.109
ep:   0 |iter:  300 | loss: 1.5574 | acc: 0.453
ep:   0 |iter:  600 | loss: 1.3354 | acc: 0.594
ep:   0 |iter:  900 | loss: 1.0934 | acc: 0.648
ep:   0 |iter: 1200 | loss: 0.9302 | acc: 0.648
ep:   0 |iter: 1500 | loss: 1.1224 | acc: 0.633
ep:   0 |iter: 1800 | loss: 0.9864 | acc: 0.656
ep:   0 |iter: 2100 | loss: 0.9190 | acc: 0.734
ep:   0 |iter: 2400 | loss: 0.9786 | acc: 0.672
ep:   0 |iter: 2700 | loss: 0.7907 | acc: 0.727
ep:   0 |iter: 3000 | loss: 0.9159 | acc: 0.727
ep:   0 |iter:    0 | loss: 0.0000 | acc: 0.493

ep:   1 |iter:    0 | loss: 0.8410 | acc: 0.703
ep:   1 |iter:  300 | loss: 0.7759 | acc: 0.781
ep:   1 |iter:  600 | loss: 0.8116 | acc: 0.805
ep:   1 |iter:  900 | loss: 0.8390 | acc: 0.719
ep:   1 |iter: 1200 | loss: 0.8157 | acc: 0.789
ep:   1 |iter: 1500 | loss: 0.8062 | acc: 0.750
ep:   1 |iter: 1800 | loss: 0.9750 | acc: 0.672
ep:   1 |iter: 2100 | loss: 0.7824 | acc: 0.742
ep:   1 |iter: 2400 | loss: 0.8079 | ac

In [8]:
class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = nn.Sequential(
            # Defining a 2D convolution layer
            nn.Conv2d(3, 16, kernel_size=5, stride=2, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # Defining another 2D convolution layer
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(1024 + 2048, 256),
            nn.ReLU(),
            nn.Linear(256, 15)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x
    
model_act = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model_act.parameters(), lr=config.lr / 10)
lr=config.lr / 10
from core.utils import AverageMeter, lr_decay
for epoch in range(n_epoch):
    for i, data in enumerate(train_loader):
        loss_act = AverageMeter()
        acc_act = AverageMeter()
        
        gamma = 0.9
        lr = lr * gamma ** epoch
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        # Measure data loading time
        pose_2d = data['pose_2d']
        act_type = data['action_type'].to(device)
        motion_gt = data['future_pose_3d']
        batch, seq_len = pose_2d.size()[:2]
        _, future_seq_len = motion_gt.size()[:2]
        opt_dim = model_pos.encoder.opt_dim

        optimizer.zero_grad()
        with torch.no_grad():
            # reshape, for 3d poses and motion, ignore the hip joint
            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
        sequence = torch.cat([pose, motion], -1)
        act_prediction = model_act(sequence)
        loss = criterion(act_prediction, act_type)
        loss.backward()
        optimizer.step()
    
        loss_act.update(loss.item())
        acc_act.update(acc(act_prediction.max(1)[1], act_type))

        if i % 300 == 0:
            print(f'ep: {epoch:3d} |iter: {i:4d} | loss: {loss_act.avg:.4f} | acc: {acc_act.avg:.3f}')
            
    # validation
    model_act.eval()
    with torch.no_grad():
        vloss_act = AverageMeter()
        vacc_act = AverageMeter()
        for i, data in enumerate(valid_loader):
            loss_act = AverageMeter()
            acc_act = AverageMeter()

            # Measure data loading time
            pose_2d = data['pose_2d']
            act_type = data['action_type'].to(device)
            motion_gt = data['future_pose_3d']
            batch, seq_len = pose_2d.size()[:2]
            _, future_seq_len = motion_gt.size()[:2]
            opt_dim = model_pos.encoder.opt_dim

            pose_2d = pose_2d.to(device).view(batch, seq_len, -1)
            motion_gt = motion_gt[:, :, 1:, :].to(device).view(batch, future_seq_len, -1)
            pred = model_pos(pose_2d, motion_gt)
            pred_pose_3d = pred['past_pose']
            pred_motion_3d = pred['future_motion']
            pose = pred_pose_3d.view(batch, seq_len, 15, 3).permute(0, 3, 2, 1)
            motion = pred_motion_3d.view(batch, future_seq_len, 15, 3).permute(0, 3, 2, 1)
            sequence = torch.cat([pose, motion], -1)
            act_prediction = model_act(sequence)
            vacc_act.update(acc(act_prediction.max(1)[1], act_type))
        print(f'ep: {epoch:3d} |iter: {0:4d} | loss: {0.0:.4f} | acc: {vacc_act.avg:.3f}\n')
        model_act.train()

ep:   0 |iter:    0 | loss: 2.7041 | acc: 0.062
ep:   0 |iter:  300 | loss: 1.3215 | acc: 0.586
ep:   0 |iter:  600 | loss: 1.3254 | acc: 0.539
ep:   0 |iter:  900 | loss: 0.9785 | acc: 0.688
ep:   0 |iter: 1200 | loss: 0.8919 | acc: 0.703
ep:   0 |iter: 1500 | loss: 0.9858 | acc: 0.656
ep:   0 |iter: 1800 | loss: 1.0120 | acc: 0.695
ep:   0 |iter: 2100 | loss: 1.0403 | acc: 0.656
ep:   0 |iter: 2400 | loss: 0.8329 | acc: 0.734
ep:   0 |iter: 2700 | loss: 1.0476 | acc: 0.703
ep:   0 |iter: 3000 | loss: 0.9087 | acc: 0.727
ep:   0 |iter:    0 | loss: 0.0000 | acc: 0.490

ep:   1 |iter:    0 | loss: 0.9015 | acc: 0.688
ep:   1 |iter:  300 | loss: 0.8773 | acc: 0.695
ep:   1 |iter:  600 | loss: 0.7983 | acc: 0.734
ep:   1 |iter:  900 | loss: 0.6760 | acc: 0.781
ep:   1 |iter: 1200 | loss: 0.7801 | acc: 0.727
ep:   1 |iter: 1500 | loss: 0.7037 | acc: 0.789
ep:   1 |iter: 1800 | loss: 0.7362 | acc: 0.727
ep:   1 |iter: 2100 | loss: 0.8453 | acc: 0.727
ep:   1 |iter: 2400 | loss: 0.7980 | ac