In [None]:
# default_exp core.transfer_model

In [None]:
#hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Transfer learning temporal model 
> Trains the temporal model from the Video Pose3D checkpoint made for inference. 

In [None]:
#export
import time
import os

import matplotlib
import numpy as np
import torch

matplotlib.use('Agg')
import matplotlib.pyplot as plt
from torch import optim

from runningpose.core.generators import ChunkedGenerator, UnchunkedGenerator
from runningpose.core.loss import mpjpe
from runningpose.core.model import TemporalModel
from runningpose.core.runningpose_dataset import RunningposeDataset


In [None]:
#export
# print('Loading dataset...')
# dataset_path = ''
# dataset = RunningposeDataset(dataset_path)

# Convert to 3D camera coordinates 
# TODO: Maybe just do this in format qtmdata. 

In [None]:
#export
print('Loading 2D detections...')
keypoints = np.load('data_2d_custom_tindra.npz', allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
# joints_left, joints_right = list(dataset.skeleton().joints_left()), list(dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()
print(keypoints.keys())
print(keypoints['Tindra_dynamic0001_Miqus_16_23604.avi'].keys())
print(keypoints['Tindra_dynamic0001_Miqus_16_23604.avi']['custom'][0].shape)

Loading 2D detections...
dict_keys(['Tindra_dynamic0001_Miqus_16_23604.avi', 'Tindra_dynamic0001_Miqus_15_21386.avi'])
dict_keys(['custom'])
(694, 17, 2)


We can see that we have a dictionary with all the videos and then a 
custom dictionary with a list with an array with (frames, keypoints, dim) 

#### Define the model
> Load the temporal model trained model for generating 3D predictions.

In [None]:
#export
# Load checkpoint
print('Loading checkpoint')
checkpoint = torch.load('pretrained_h36m_detectron_coco.bin', 
                        map_location=lambda storage,
                        loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))
checkpoint.keys()

Loading checkpoint
This model was trained for 80 epochs


dict_keys(['epoch', 'lr', 'model_pos'])

In [None]:
#export
# Hyperparameters
num_joints_in = 17 # COCO
in_features = 2 # dimension of in joints
num_joints_out = 20 # runningpose
filter_widths = [3, 3, 3, 3, 3] # just as in inference  
causal = False # No real time predictions 
dropout = 0.25 # default
channels = 1024 # default
lr = 0.001 # default
lr_decay = 0.95 # default
batch_size = 64
chunk_length = 1
num_epochs = 10

# Load two models one for training and one for evaluation
model_run_train = TemporalModel(
    num_joints_in, in_features, num_joints_out, filter_widths, causal, 
    dropout, channels
)
model_run = TemporalModel(
    num_joints_in, in_features, num_joints_out, filter_widths, causal, 
    dropout, channels
)

if torch.cuda.is_available():
    model_run_train = model_run_train.cuda()
    model_run = model_run.cuda()

model_run_train.load_state_dict(checkpoint['model_pos'])

<All keys matched successfully>

In [None]:
#export
# Calculate padding based on receptive field
receptive_field = model_run_train.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2 # Padding on each side

INFO: Receptive field: 243 frames


In [None]:
#export
# Optimizer
optimizer = optim.Adam(model_run_train.parameters(), lr=lr, amsgrad=True)
scaler = torch.cuda.amp.GradScaler()

# Initialize loss
losses_3d_train = []
losses_3d_train_eval = []
losses_3d_valid = []

# Using batch norm momentum
initial_momentum = 0.1
final_momentum = 0.001

#### Batch generators

In [None]:
#export
valid_generator = UnchunkedGenerator(
    cameras_valid=None, poses_valid=None, poses_valid_2d=None,
    pad=pad, augment=False,
    kps_left=kps_left, kps_right=kps_right, 
    joints_left=None, joints_right=None
)
print('INFO: Testing on {} frames'.format(valid_generator.num_frames()))
train_generator = ChunkedGenerator(
    batch_size, cameras=None, poses_3d=None, poses_2d=None, 
    chunk_length=chunk_length, shuffle=True, augment=False, kps_left=kps_left, 
    kps_right=kps_right, joints_left=None, joints_right=None
)
train_generator_eval = UnchunkedGenerator(
    cameras=None, poses_3d=None, poses_2d=None, pad=pad, augment=False
)
print('INFO: Training on {} frames'.format(train_generator_eval.num_frames()))

TypeError: object of type 'NoneType' has no len()

In [None]:
#export
epoch = 0
while epoch < num_epochs:
    start_time = time()
    # Initialize training loss
    epoch_loss_3d_train = 0
    epoch_loss_traj_train = 0
    epoch_loss_2d_train_unlabeled = 0
    N = 0
    # Regular supervised scenario
    for _, batch_3d, batch_2d in train_generator.next_expoch():
        inputs_3d = torch.from_numpy(batch_3d.astype('float32'))
        inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
        if torch.cuda.is_available():
            inputs_3d = inputs_3d.cuda()
            inputs_2d = inputs_2d.cuda()
        inputs_3d[:, :, 0] = 0
        
        optimizer.zero_grad()

        # Predict 3D poses (forward)
        with torch.cuda.amp.autocast():
            predicted_3d_pos = model_run_train(inputs_2d)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
            N += inputs_3d.shape[0]*inputs_3d.shape[1]
            # Calculate the epoch loss
            epoch_loss_3d_train += inputs_3d.shape[0] \
            * inputs_3d.shape[1] * loss_3d_pos.item()

        # Backward
        loss_total = loss_3d_pos
        scaler.scale(loss_total).backward()
        scaler.step(optimizer)
        optimizer.update()

    # Total loss over one epoch
    losses_3d_train.append(epoch_loss_3d_train / N)
    
    # End-of-epoch evaluation
    with torch.no_grad():
        # Load the newly trained network
        model_run.load_state_dict(model_run_train.state_dict())
        model_run.eval()
        # Initialize validation loss
        epoch_loss_3d_valid = 0
        epoch_loss_traj_valid = 0
        epoch_loss_2d_valid = 0
        N = 0

        # Evaluate on validation dataset
        for _, batch_3d, batch_2d in valid_generator.next_epoch():
            inputs_3d_valid = torch.from_numpy(batch_3d.astype('float32'))
            inputs_2d_valid = torch.from_numpy(batch_2d.astype('float32'))
            if torch.cuda.is_available():
                inputs_3d_valid = inputs_3d_valid.cuda()
                inputs_2d_valid = inputs_2d_valid.cuda()
            inputs_3d_valid[:, :, 0] = 0

            # Predict 3D poses (forward)
            predicted_3d_pos = model_run(inputs_2d_valid)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d_valid)
            N += inputs_3d_valid.shape[0]*inputs_3d_valid.shape[1]
            epoch_loss_3d_valid += inputs_3d_valid.shape[0] \
            * inputs_3d_valid.shape[1] * loss_3d_pos.item()

        # Total loss over one epoch
        losses_3d_valid.append(epoch_loss_3d_valid / N)

        # Evaluate on training set, this time in evaluation mode
        epoch_loss_3d_train_eval = 0
        epoch_loss_traj_train_eval = 0
        epoch_loss_2d_train_labeled_eval = 0
        N = 0
        for _, batch_3d, batch_2d in train_generator_eval.next_epoch():
            if batch_2d.shape[1] == 0:
                # This can only happen when downsampling the dataset
                continue
            
            inputs_3d = torch.from_numpy(batch_3d.astype('float32'))
            inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
            if torch.cuda.is_available():
                inputs_3d = inputs_3d.cuda()
                inputs_2d = inputs_2d.cuda()
            inputs_3d[:, :, 0] = 0

            # Predict 3D poses (forward)
            predicted_3d_pos = model_run(inputs_2d)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
            N += inputs_3d.shape[0]*inputs_3d.shape[1]
            epoch_loss_3d_valid += inputs_3d.shape[0] \
            * inputs_3d.shape[1] * loss_3d_pos.item()

        # Total loss over one epoch
        losses_3d_train_eval.append(epoch_loss_3d_train_eval / N)

    # Calculate total training/validation time over one epoch       
    elapsed = (time() - start_time)/60

    print(
        f'''[{epoch+1}] time {elapsed:.2f} lr {lr} 
        3d_train {losses_3d_train[-1] * 1000} 
        3d_eval {losses_3d_train_eval[-1] * 1000} 
        3d_valid {losses_3d_valid[-1]  *1000}'''
    )

    # Decay learning rate exponentially
    lr *= lr_decay
    for param_group in optimizer.param_groups:
        param_group['lr'] *= lr_decay
    epoch += 1

    # Decay BatchNorm momentum
    momentum = initial_momentum * np.exp(
        -epoch/num_epochs * np.log(initial_momentum/final_momentum)
    )
    model_run_train.set_bn_momentum(momentum)

    # Save training curves after every epoch, as .png images
    plt.figure()
    epoch_x = np.arange(3, len(losses_3d_train)) + 1
    plt.plot(epoch_x, losses_3d_train[3:], '--', color='C0')
    plt.plot(epoch_x, losses_3d_train_eval[3:], color='C0')
    plt.plot(epoch_x, losses_3d_valid[3:], color='C1')
    plt.legend(['3d train', '3d train (eval)', '3d valid (eval)'])
    plt.ylabel('MPJPE (m)')
    plt.xlabel('Epoch')
    plt.xlim((3, epoch))
    plt.savefig(os.path.join(str(epoch), '_loss_3d.png'))
    plt.close('all')

#### Save model

In [None]:
chk_path = os.path.join('runningpose', '_epoch_{}.bin'.format(epoch))
print('Saving checkpoint to', chk_path)
torch.save({
    'epoch': epoch,
    'lr': lr,
    'random_state': train_generator.random_state(),
    'optimizer': optimizer.state_dict(),
    'model_run': model_run_train.state_dict(),
}, chk_path)