# Pose Estimation Training

In [3]:
import argparse
import time
import os
import numpy as np
from collections import OrderedDict

import mxnet as mx
from mxnet import gluon, autograd, nd
from mxnet.gluon import nn, loss

from network.rtpose_vgg import get_model
from training.datasets.coco import get_loader
from mxboard import SummaryWriter    

### Configuration

In [7]:
data_dir = 'training/dataset/COCO/images'
mask_dir = 'training/dataset/COCO/mask'
logdir = 'logs'
json_path = 'training/dataset/COCO/COCO.json'
model_path = 'model_checkpoints/'
lr = 1.                    
momentum = 0.9
epochs_ft = 5
epochs_pre = 5
wd = 0.0                    
nesterov = False
optim = 'sgd'
gpuIDs = [0]
batch_size = 8
print_freq = 20
load_model = 'lr_0.1_wd_0.00001_momentum_0.8_ft_vgg_pose_1.params'
log_key = 'notebook_tests'

ctx = [mx.gpu(e) for e in gpuIDs] if gpuIDs[0] != -1 else [mx.cpu()]
ctx = ctx[0] # single GPU for now

params_transform = dict()
params_transform['mode'] = 5
# === aug_scale ===
params_transform['scale_min'] = 0.5
params_transform['scale_max'] = 1.1
params_transform['scale_prob'] = 1
params_transform['target_dist'] = 0.6
# === aug_rotate ===
params_transform['max_rotate_degree'] = 40

# ===
params_transform['center_perterb_max'] = 40

# === aug_flip ===
params_transform['flip_prob'] = 0.5

params_transform['np'] = 56
params_transform['sigma'] = 7.0
params_transform['limb_width'] = 1.

Helper classes and functions

In [8]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def build_names():
    names = []
    for j in range(1, 7):
        for k in range(1, 3):
            names.append('loss_stage%d_L%d' % (j, k))
    return names

#### Loss function

In [9]:
def get_loss(saved_for_loss, heat_temp, heat_weight,
               vec_temp, vec_weight):

    names = build_names()
    saved_for_log = OrderedDict()
    loss_fn = gluon.loss.L2Loss()
    total_loss = 0

    for j in range(6):
        pred1 = saved_for_loss[2 * j] * vec_weight
        gt1 = vec_temp * vec_weight
        pred2 = saved_for_loss[2 * j + 1] * heat_weight
        gt2 = heat_weight * heat_temp
        # Compute losses
        loss1 = loss_fn(pred1, gt1)
        loss2 = loss_fn(pred2, gt2) 
        total_loss = total_loss + loss1
        total_loss = total_loss + loss2
        saved_for_log[names[2 * j]] = loss1.mean().asscalar()
        saved_for_log[names[2 * j + 1]] = loss2.mean().asscalar()

    saved_for_log['max_ht'] = saved_for_loss[-1][:, 0:-1, :, :].asnumpy().max()
    saved_for_log['min_ht'] = saved_for_loss[-1][:, 0:-1, :, :].asnumpy().min()
    saved_for_log['max_paf'] = saved_for_loss[-2].asnumpy().max()
    saved_for_log['min_paf'] = saved_for_loss[-2].asnumpy().min()

    return total_loss, saved_for_log

Training and evaluation loops

In [10]:
def run_epoch(iterator, model, epoch, is_train=True, trainer_vgg=None, trainer_pose=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    
    meter_dict = {}
    for name in build_names():
        meter_dict[name] = AverageMeter()
    meter_dict['max_ht'] = AverageMeter()
    meter_dict['min_ht'] = AverageMeter()    
    meter_dict['max_paf'] = AverageMeter()    
    meter_dict['min_paf'] = AverageMeter()
    
    end = time.time()
    
    for i, (img, heatmap_target, heat_mask, paf_target, paf_mask) in enumerate(iterator):
        img = img.as_in_context(ctx)
        heatmap_target = heatmap_target.as_in_context(ctx)
        heat_mask = heat_mask.as_in_context(ctx)
        paf_target = paf_target.as_in_context(ctx)
        paf_mask = paf_mask.as_in_context(ctx)
                
        with autograd.record(is_train):
            # compute output
            _,saved_for_loss = model(img)

            total_loss, saved_for_log = get_loss(saved_for_loss, heatmap_target, heat_mask,
                   paf_target, paf_mask)

            for name,_ in meter_dict.items():
                meter_dict[name].update(saved_for_log[name], img.shape[0])
            losses.update(total_loss.mean().asscalar(), img.shape[0])

        if is_train:
            total_loss.backward()
            trainer_vgg.step(img.shape[0])
            trainer_pose.step(img.shape[0])

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % print_freq == 0 and is_train:
            print('Epoch: [{0}][{1}/{2}]\t'.format(epoch, i, len(iterator)))
            print('Data time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( batch_time=batch_time))
            print('Loss {loss.val:.4f} ({loss.avg:.4f})'.format(loss=losses))
            writer.add_scalar('data/max_ht', {log_key:meter_dict['max_ht'].avg}, i+epoch*len(iterator))
            writer.add_scalar('data/max_paf', {log_key:meter_dict['max_paf'].avg}, i+epoch*len(iterator))
            writer.add_scalar('data/loss', {log_key:losses.avg}, i+epoch*len(iterator)),
            for name, value in meter_dict.items():
                print('{name}: {loss.val:.4f} ({loss.avg:.4f})\t'.format(name=name, loss=value))
            writer.flush()
    return losses.avg
    

#### Data Loading

In [11]:
print("Loading dataset...")
# load data
train_data = get_loader(json_path, data_dir,
                        mask_dir, 368, 8,
                        'vgg', batch_size, params_transform = params_transform, 
                        shuffle=True, training=True, num_workers=8)
print('train dataset len: {}'.format(len(train_data._dataset)))

# validation data
valid_data = get_loader(json_path, data_dir, mask_dir, 368,
                            8, preprocess='vgg', training=False,
                            batch_size=batch_size, params_transform = params_transform, shuffle=False, num_workers=8)
print('val dataset len: {}'.format(len(valid_data._dataset)))



Loading dataset...
train dataset len: 121522
val dataset len: 4873


Creating the network

In [12]:
model = get_model(trunk='vgg19')
model.collect_params().reset_ctx(ctx)
if load_model != '':
    model.load_parameters(os.path.join(model_path, load_model), ctx=ctx)
model.hybridize()

Training first with backbone fixed

In [None]:
# Fix the VGG pre-trained weights for now
trainer_vgg = gluon.Trainer(model.model0.collect_params('.*CPM.*'), 'sgd', {'learning_rate':lr, 'momentum': momentum, 'wd':wd})
trainer_pose = gluon.Trainer(model.collect_params('block.*'), 'sgd', {'learning_rate':lr, 'momentum': momentum, 'wd':wd}) 
                                                                                          
writer = SummaryWriter(logdir=logdir)       
for epoch in range(epochs_pre):
    # train for one epoch
    train_loss = run_epoch(train_data, model, epoch, is_train=True, trainer_vgg=trainer_vgg, trainer_pose=trainer_pose)
    model.save_parameters(os.path.join(model_path, log_key+'_vgg_pose_'+str(epoch)+'.params'))
    # evaluate on validation set
    val_loss = run_epoch(valid_data, model, epoch, is_train=False)  
                  
    writer.add_scalar('epoch/train_loss', {log_key: train_loss}, epoch)
    writer.add_scalar('epoch/val_loss', {log_key: val_loss}, epoch)       

Fine-tuning the model

In [None]:
if optim == 'sgd':
    trainer_vgg = gluon.Trainer(model.model0.collect_params(), 'sgd', {'learning_rate':lr, 'momentum': momentum, 'wd':wd})
    trainer_pose = gluon.Trainer(model.collect_params('block.*'), 'sgd', {'learning_rate':lr, 'momentum': momentum, 'wd':wd}) 
elif optim == 'adam':
    trainer_vgg = gluon.Trainer(model.model0.collect_params(), 'adam', {'learning_rate':lr, 'wd':wd})
    trainer_pose = gluon.Trainer(model.collect_params('block.*'), 'adam', {'learning_rate':lr, 'wd':wd}) 
else:
    raise "Unknown optim " + optim
log_key += '_ft'        

for epoch in range(epochs_pre, epochs_pre+epochs_ft):
    # train for one epoch
    train_loss = run_epoch(train_data, model, epoch, is_train=True, trainer_vgg=trainer_vgg, trainer_pose=trainer_pose)
    model.save_parameters(os.path.join(model_path, log_key+'_vgg_pose_'+str(epoch)+'.params'))
    # evaluate on validation set
    val_loss = run_epoch(valid_data, model, epoch, is_train=False)  
                                 
    writer.add_scalar('epoch_ft/train_loss', {log_key: train_loss}, epoch)
    writer.add_scalar('epoch_ft/val_loss', {log_key: val_loss}, epoch)                                                                

In [None]:
writer.close()    