In [None]:
# default_exp core.transfer_model

In [None]:
#hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Transfer learning temporal model 
> Trains the temporal model from the Video Pose3D checkpoint made for inference. 

In [None]:
#export
import os
import time

import matplotlib
import numpy as np
import torch

matplotlib.use('Agg')
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn

from runningpose.core.generators import ChunkedGenerator, UnchunkedGenerator
from runningpose.core.loss import mpjpe
from runningpose.core.model import TemporalModel
from runningpose.core.camera import normalize_screen_coordinates

### Load
#### Training data

In [None]:
#export
print('Loading training dataset...')
keypoints_3D = np.load('data_3d_train.npz', allow_pickle=True)
keypoints_3D = keypoints_3D['positions_3d'].item()
joints_left = [3, 6, 7, 10, 12, 14, 16]
joints_right = [4, 8, 9, 11, 13, 15, 17]
print(keypoints_3D.keys())
print(keypoints_3D['Ioanna3_Camera1_170Hz_3D_keypoints'].shape)

Loading training dataset...
dict_keys(['Ioanna1_Camera1_170Hz_3D_keypoints', 'Ioanna2_Camera1_170Hz_3D_keypoints', 'Ioanna3_Camera1_170Hz_3D_keypoints', 'Josef1_Camera1_170Hz_3D_keypoints', 'Josef2_Camera1_170Hz_3D_keypoints', 'Josef3_Camera1_170Hz_3D_keypoints', 'Josef4_Camera1_170Hz_3D_keypoints', 'Ioanna1_Camera2_170Hz_3D_keypoints', 'Ioanna2_Camera2_170Hz_3D_keypoints', 'Ioanna3_Camera2_170Hz_3D_keypoints', 'Josef1_Camera2_170Hz_3D_keypoints', 'Josef2_Camera2_170Hz_3D_keypoints', 'Josef3_Camera2_170Hz_3D_keypoints', 'Josef4_Camera2_170Hz_3D_keypoints', 'Ioanna1_Camera3_170Hz_3D_keypoints', 'Ioanna2_Camera3_170Hz_3D_keypoints', 'Ioanna3_Camera3_170Hz_3D_keypoints', 'Josef1_Camera3_170Hz_3D_keypoints', 'Josef2_Camera3_170Hz_3D_keypoints', 'Josef3_Camera3_170Hz_3D_keypoints', 'Josef4_Camera3_170Hz_3D_keypoints'])
(374, 18, 3)


We can see that we have a dictionary with all the videos and then a 
custom dictionary with a list with an array with (frames, keypoints, dim) 

In [None]:
#export
print('Loading 2D training detections...')
keypoints_2D = np.load('data_2d_custom_trainingdata.npz', allow_pickle=True)
keypoints_2D_metadata = keypoints_2D['metadata'].item()
keypoints_2D_symmetry = keypoints_2D_metadata['keypoints_symmetry']
kps_left = list(keypoints_2D_symmetry[0])
kps_right = list(keypoints_2D_symmetry[1])
keypoints_2D = keypoints_2D['positions_2d'].item()
keypoints_2D = dict(sorted(keypoints_2D.items()))
print(keypoints_2D.keys())
print(keypoints_2D['miqus1_Ioanna_01.avi']['custom'][0].shape)

Loading 2D training detections...
dict_keys(['miqus1_Ioanna_01.avi', 'miqus1_Ioanna_02.avi', 'miqus1_Ioanna_03.avi', 'miqus1_Josef_01.avi', 'miqus1_Josef_02.avi', 'miqus1_Josef_03.avi', 'miqus1_Josef_04.avi', 'miqus2_Ioanna_01.avi', 'miqus2_Ioanna_02.avi', 'miqus2_Ioanna_03.avi', 'miqus2_Josef_01.avi', 'miqus2_Josef_02.avi', 'miqus2_Josef_03.avi', 'miqus2_Josef_04.avi', 'miqus3_Ioanna_01.avi', 'miqus3_Ioanna_02.avi', 'miqus3_Ioanna_03.avi', 'miqus3_Josef_01.avi', 'miqus3_Josef_02.avi', 'miqus3_Josef_03.avi', 'miqus3_Josef_04.avi'])
(323, 17, 2)


#### Normalize training inputs
The resolution of the third camera is diffrent due to being cropped. 
During our datacollection we got some other people in the shot. Why we had to
crop the video. 

First convert the 3D data from millimeter -> meter.

In [None]:
#export
for subject in keypoints_3D.keys():
    keypoints_3D[subject] /= 1000

In [None]:
#export
# Normalize camera frame
subjects = [
    'miqus1_Ioanna_01.avi', 'miqus1_Ioanna_02.avi', 'miqus1_Ioanna_03.avi', 
    'miqus1_Josef_01.avi', 'miqus1_Josef_02.avi', 'miqus1_Josef_03.avi', 
    'miqus1_Josef_04.avi', 'miqus2_Ioanna_01.avi', 'miqus2_Ioanna_02.avi', 
    'miqus2_Ioanna_03.avi', 'miqus2_Josef_01.avi', 'miqus2_Josef_02.avi', 
    'miqus2_Josef_03.avi', 'miqus2_Josef_04.avi'
]
# 2D data 
for subject in subjects:
    for action in keypoints_2D[subject]:
        for idx, kps in enumerate(keypoints_2D[subject][action]):
            kps = normalize_screen_coordinates(kps, w=1920, h=1088)
            keypoints_2D[subject][action][idx] = kps

In [None]:
#export
subjects_extra_cut = ['miqus3_Ioanna_01.avi', 'miqus3_Josef_01.avi']
for subject in subjects_extra_cut:
    for action in keypoints_2D[subject]:
        for idx, kps in enumerate(keypoints_2D[subject][action]):
            kps = normalize_screen_coordinates(kps, w=1350, h=1088)
            keypoints_2D[subject][action][idx] = kps

In [None]:
#export
subjects_cut = [
    'miqus3_Ioanna_02.avi', 'miqus3_Ioanna_03.avi', 'miqus3_Josef_02.avi', 
    'miqus3_Josef_03.avi', 'miqus3_Josef_04.avi'
]
for subject in subjects_cut:
    for action in keypoints_2D[subject]:
        for idx, kps in enumerate(keypoints_2D[subject][action]):
                kps = normalize_screen_coordinates(kps, w=1480, h=1088)
                keypoints_2D[subject][action][idx] = kps

#### Validation data

In [None]:
#export
print('Loading validation dataset...')
keypoints_3D_val = np.load('data_3d_val.npz', allow_pickle=True)
keypoints_3D_val = keypoints_3D_val['positions_3d'].item()
print(keypoints_3D_val.keys())
# print(keypoints_3D['Ioanna1_Camera1_170Hz_3D_keypoints'].shape)

Loading validation dataset...
dict_keys(['Tindra1_Camera1_170Hz_3D_keypoints', 'Tindra2_Camera1_170Hz_3D_keypoints', 'Tindra3_Camera1_170Hz_3D_keypoints', 'Tindra1_Camera2_170Hz_3D_keypoints', 'Tindra2_Camera2_170Hz_3D_keypoints', 'Tindra3_Camera2_170Hz_3D_keypoints', 'Tindra1_Camera3_170Hz_3D_keypoints', 'Tindra2_Camera3_170Hz_3D_keypoints', 'Tindra3_Camera3_170Hz_3D_keypoints'])


In [None]:
#export
print('Loading 2D training detections...')
keypoints_2D_val = np.load('data_2d_custom_validationdata.npz', allow_pickle=True)
keypoints_2D_val = keypoints_2D_val['positions_2d'].item()
keypoints_2D_val = dict(sorted(keypoints_2D_val.items()))
print(keypoints_2D_val.keys())

Loading 2D training detections...
dict_keys(['miqus1_Tindra_01.avi', 'miqus1_Tindra_02.avi', 'miqus1_Tindra_03.avi', 'miqus2_Tindra_01.avi', 'miqus2_Tindra_02.avi', 'miqus2_Tindra_03.avi', 'miqus3_Tindra_01.avi', 'miqus3_Tindra_02.avi', 'miqus3_Tindra_03.avi'])


#### Normalize validation input

In [None]:
#export
for subject in keypoints_3D_val.keys():
    keypoints_3D_val[subject] /= 1000

In [None]:
#export
subjects = [
    'miqus1_Tindra_01.avi', 'miqus1_Tindra_02.avi', 'miqus1_Tindra_03.avi', 
    'miqus2_Tindra_01.avi', 'miqus2_Tindra_02.avi', 'miqus2_Tindra_03.avi'
]
for subject in subjects:
    for action in keypoints_2D_val[subject]:
        for idx, kps in enumerate(keypoints_2D_val[subject][action]):
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=1920, h=1088)
            keypoints_2D_val[subject][action][idx] = kps

In [None]:
#export
subjects_cut = [
    'miqus3_Tindra_01.avi', 'miqus3_Tindra_02.avi', 
    'miqus3_Tindra_03.avi'
]
for subject in subjects_cut:
    for action in keypoints_2D_val[subject]:
        for idx, kps in enumerate(keypoints_2D_val[subject][action]):
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=1480, h=1088)
            keypoints_2D_val[subject][action][idx] = kps

#### Check and fix errors in data
Assert that we have the same number of frames in the 2D and 3D data.

In [None]:
#export
# Wrong cut .avi should have been 4.42 long but is 4.44 
keypoints_2D['miqus3_Josef_03.avi']['custom'][0] = keypoints_2D['miqus3_Josef_03.avi']['custom'][0][:374]

In [None]:
#export
shapes_3d = []
for subject in keypoints_3D.keys():
    shapes_3d.append(keypoints_3D[subject].shape)

##### Fix missing right foot values by inferring from a later frame
> We had a missing RForefoot2 in the start

In [None]:
#export
keypoints_3D['Ioanna2_Camera1_170Hz_3D_keypoints'][:19, 9] = keypoints_3D['Ioanna2_Camera1_170Hz_3D_keypoints'][20, 9]
keypoints_3D['Ioanna2_Camera2_170Hz_3D_keypoints'][:19, 9] = keypoints_3D['Ioanna2_Camera2_170Hz_3D_keypoints'][20, 9]
keypoints_3D['Ioanna2_Camera3_170Hz_3D_keypoints'][:19, 9] = keypoints_3D['Ioanna3_Camera2_170Hz_3D_keypoints'][20, 9]

In [None]:
#export
shapes_2d = []
for subject in keypoints_2D.keys():
    shapes_2d.append(keypoints_2D[subject]['custom'][0].shape)   

In [None]:
#export
for i in range(len(shapes_2d)):
    assert shapes_2d[i][0] == shapes_3d[i][0], f'subject {i}: {shapes_2d[i][0]}, {shapes_3d[i][0]}'

#### Root relative coordinates
> The 3D predictions should be root relative so we need to convert our 3D data. WaistBack is root. 

In [None]:
for subject in keypoints_3D.keys():
    for i in range(keypoints_3D[subject].shape[0]):
        keypoints_3D[subject][i, :, :] -= keypoints_3D[subject][i, 5, :]

for subject in keypoints_3D_val.keys():
    for i in range(keypoints_3D_val[subject].shape[0]):
        keypoints_3D_val[subject][i, :, :] -= keypoints_3D_val[subject][i, 5, :]

Roll out all the data to lists for the generators to create batches. 

In [None]:
#export
poses_2d_train = []
for subject in keypoints_2D.keys():
    poses_2d_train.append(keypoints_2D[subject]['custom'][0])

poses_3d_train = []
for subject in keypoints_3D.keys():
    poses_3d_train.append(keypoints_3D[subject])

assert len(poses_2d_train) == len(poses_3d_train), "Number of runs doesn't match."

poses_2d_val = []
for subject in keypoints_2D_val.keys():
    poses_2d_val.append(keypoints_2D_val[subject]['custom'][0])

poses_3d_val = []
for subject in keypoints_3D_val.keys():
    poses_3d_val.append(keypoints_3D_val[subject])

assert len(poses_2d_val) == len(poses_3d_val), "Number of runs doesn't match."

#### Define the model
> Load the temporal model trained model for generating 3D predictions.

In [None]:
#export
# Load checkpoint
print('Loading checkpoint')
checkpoint = torch.load('pretrained_h36m_detectron_coco.bin', 
                        map_location=lambda storage,
                        loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))

Loading checkpoint
This model was trained for 80 epochs


> Initialize the final layer weights correctly. E.g. if you are regressing some values that have a mean of 50 then
initialize the final bias to 50. If you have an imbalanced dataset of a ratio 1:10 of positives:negatives, set the bias on
your logits such that your network predicts probability of 0.1 at initialization. Setting these correctly will speed up
convergence and eliminate “hockey stick” loss curves - Andrej Karpathy

In [None]:
# Check what the output mean is for each keypoint over all subjects.
allsub_keypoints = 0
for subject in keypoints_3D.keys():
    allsub_keypoints += np.mean(keypoints_3D[subject], axis=0)

mean_keypoints = allsub_keypoints / len(keypoints_3D.keys())
mean_keypoints = np.reshape(
    mean_keypoints, (mean_keypoints.shape[0]*mean_keypoints.shape[1])
)

In [None]:
#export
# Hyperparameters
num_joints_in = 17 # COCO
in_features = 2 # dimension of in joints
num_joints_out = 18
filter_widths = [3, 3, 3, 3, 3] # just as in inference  
causal = False # No real time predictions 
dropout = 0.25
channels = 1024 # default
lr = 3e-4
lr_decay = 1
batch_size = 256
chunk_length = 1
num_epochs = 10
unfreeze_epoch = 2
trigger_times = 0
patience = 600 #################NOTE: Turn off

# Load two models one for training and one for evaluation
model_run_train = TemporalModel(
    num_joints_in, in_features, num_joints_out, filter_widths, causal, 
    dropout, channels
)
model_run = TemporalModel(
    num_joints_in, in_features, num_joints_out, filter_widths, causal, 
    dropout, channels
)

# Reintizialize the last output layer to fit new out. 
checkpoint['model_pos']['shrink.weight'] = torch.randn(num_joints_out*3, channels, 1)
checkpoint['model_pos']['shrink.bias'] = torch.from_numpy(mean_keypoints)

# Load the pretrained model i.e to do transfer learning
model_run_train.load_state_dict(checkpoint['model_pos'])

# Freeze all layers except the last new layer
for name, param in model_run_train.named_parameters():
    if name != 'shrink.weight' and name != 'shrink.bias':
        param.requires_grad = False

if torch.cuda.is_available():
    model_run_train = model_run_train.cuda()
    model_run = model_run.cuda()

In [None]:
#export
# Calculate padding based on receptive field
receptive_field = model_run_train.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2 # Padding on each side

INFO: Receptive field: 243 frames


In [None]:
#export
# Optimizer
optimizer = optim.Adam(model_run_train.parameters(), lr=lr, amsgrad=True)
scaler = torch.cuda.amp.GradScaler()
# Initialize loss
losses_3d_train = []
losses_3d_train_eval = []
losses_3d_valid = []

# Using batch norm momentum
initial_momentum = 0.1
final_momentum = 0.001



#### Batch generators

In [None]:
#export
valid_generator = UnchunkedGenerator(
    cameras=None, poses_3d=poses_3d_val, poses_2d=poses_2d_val,
    pad=pad, augment=False,
    kps_left=kps_left, kps_right=kps_right, 
    joints_left=joints_left, joints_right=joints_right
)
print('INFO: Testing on {} frames'.format(valid_generator.num_frames()))

train_generator = ChunkedGenerator(
    batch_size, cameras=None, poses_3d=poses_3d_train, poses_2d=poses_2d_train, 
    pad=pad, chunk_length=chunk_length, shuffle=True, augment=True, 
    kps_left=kps_left, kps_right=kps_right, 
    joints_left=joints_left, joints_right=joints_right
)
train_generator_eval = UnchunkedGenerator(
    cameras=None, poses_3d=poses_3d_train, poses_2d=poses_2d_train, 
    pad=pad, augment=False
)
print('INFO: Training on {} frames'.format(train_generator_eval.num_frames()))

INFO: Testing on 2414 frames
INFO: Training on 6426 frames


#### Training
Trains with freezed layers first then moves on training the whole net

In [None]:
#export
epoch = 0
while epoch < num_epochs:
    start_time = time.time()
    # Unfreezes the layers after a given epoch
    if epoch == unfreeze_epoch:
            print('Unfreezes the model after epoch:', epoch)
            for param in model_run_train.parameters():
                param.requires_grad = True

    # Initialize training loss
    epoch_loss_3d_train = 0
    epoch_loss_2d_train_unlabeled = 0
    N = 0
    # Regular supervised scenario
    for _, batch_3d, batch_2d in train_generator.next_epoch():
        inputs_3d = torch.from_numpy(batch_3d.astype('float32'))
        inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
        if torch.cuda.is_available():
            inputs_3d = inputs_3d.cuda()
            inputs_2d = inputs_2d.cuda()

        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Predict 3D poses (forward) using fp16
        with torch.cuda.amp.autocast():
            predicted_3d_pos = model_run_train(inputs_2d)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
            N += inputs_3d.shape[0]
            epoch_loss_3d_train += inputs_3d.shape[0] * loss_3d_pos.item()

        # Backward
        scaler.scale(loss_3d_pos).backward()
        scaler.step(optimizer)
        scaler.update()
    
    # Total loss over one epoch
    losses_3d_train.append(epoch_loss_3d_train / N)
    
    # End-of-epoch evaluation
    with torch.no_grad():
        # Load the newly trained network
        model_run.load_state_dict(model_run_train.state_dict())
        model_run.eval()
        # Initialize validation loss
        epoch_loss_3d_valid = 0
        epoch_loss_2d_valid = 0
        N = 0

        # Evaluate on validation dataset
        for _, batch_3d, batch_2d in valid_generator.next_epoch():
            inputs_3d_valid = torch.from_numpy(batch_3d.astype('float32'))
            inputs_2d_valid = torch.from_numpy(batch_2d.astype('float32'))
            if torch.cuda.is_available():
                inputs_3d_valid = inputs_3d_valid.cuda()
                inputs_2d_valid = inputs_2d_valid.cuda()

            # Predict 3D poses (forward)
            predicted_3d_pos = model_run(inputs_2d_valid)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d_valid)
            N += inputs_3d_valid.shape[0]
            epoch_loss_3d_valid += inputs_3d_valid.shape[0] * loss_3d_pos.item()

        # Total loss over one epoch
        losses_3d_valid.append(epoch_loss_3d_valid / N)

        # Early stopping
        if epoch > 1:
            if losses_3d_valid[-1] > losses_3d_valid[-2]:
                trigger_times += 1
                print('Trigger Times:', trigger_times, flush=True)

                if trigger_times > patience:
                    print('Early stopping! at epoch:', epoch+1)
                    epoch = num_epochs
                    
        # Evaluate on training set, this time in evaluation mode
        epoch_loss_3d_train_eval = 0
        epoch_loss_2d_train_labeled_eval = 0
        N = 0
        for _, batch_3d, batch_2d in train_generator_eval.next_epoch():
            if batch_2d.shape[1] == 0:
                # This can only happen when downsampling the dataset
                continue
            
            inputs_3d = torch.from_numpy(batch_3d.astype('float32'))
            inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
            if torch.cuda.is_available():
                inputs_3d = inputs_3d.cuda()
                inputs_2d = inputs_2d.cuda()

            # Predict 3D poses (forward)
            predicted_3d_pos = model_run(inputs_2d)
            loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
            N += inputs_3d.shape[0]
            epoch_loss_3d_train_eval += inputs_3d.shape[0] * loss_3d_pos.item()

        # Total loss over one epoch
        losses_3d_train_eval.append(epoch_loss_3d_train_eval / N)

    # Calculate total training/validation time over one epoch       
    elapsed = time.time() - start_time

    print(
        f'''[{epoch+1}] time {elapsed:.2f} lr {lr} 
        3d_train {losses_3d_train[-1] * 1000} 
        3d_eval {losses_3d_train_eval[-1] * 1000} 
        3d_valid {losses_3d_valid[-1]  * 1000}''', 
        flush=True
    )

    # Decay learning rate exponentially
    lr *= lr_decay
    for param_group in optimizer.param_groups:
        param_group['lr'] *= lr_decay
    epoch += 1

    # Decay BatchNorm momentum
    momentum = initial_momentum * np.exp(
        -epoch/num_epochs * np.log(initial_momentum/final_momentum)
    )
    model_run_train.set_bn_momentum(momentum)

    # Save training curves after every epoch, as .png images
    if epoch >= num_epochs:
        plt.figure()
        epoch_x = np.arange(5, len(losses_3d_train)) + 1
        plt.plot(epoch_x, losses_3d_train[5:], '--', color='C0')
        plt.plot(epoch_x, losses_3d_train_eval[5:], color='C0')
        plt.plot(epoch_x, losses_3d_valid[5:], color='C1')
        plt.legend(['3d train', '3d train (eval)', '3d valid (eval)'])
        plt.ylabel('MPJPE (m)')
        plt.xlabel('Epoch')
        plt.xlim((3, epoch))
        plt.savefig('loss_plots/' + str(epoch) + '_loss_3d.png')
        plt.close('all')



KeyboardInterrupt: 

#### Save model

In [None]:
#export
chk_path = os.path.join('runningpose_epoch_{}.bin'.format(epoch))
print(chk_path)
print('Saving checkpoint to', chk_path)
torch.save({
    'epoch': epoch,
    'lr': lr,
    'random_state': train_generator.random_state(),
    'optimizer': optimizer.state_dict(),
    'model_run': model_run_train.state_dict(),
}, chk_path)

#### Vizualize the predictions on one of our validation videos
> This is highly temporary

In [None]:
#hide
from runningpose.core.skeleton import Skeleton
runningpose_skeleton = Skeleton(
    parents=[-1, 0, 1, 1, 1, 2, 5, 16, 5, 17, 3, 4, 10, 11, 6, 8, 14, 15],
    joints_left=[3, 6, 7, 10, 12, 14, 16], 
    joints_right=[4, 8, 9, 11, 13, 15, 17]
)
h36m_skeleton = Skeleton(
       parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8],
       joints_left=[6, 7, 8, 9, 10],
       joints_right=[1, 2, 3, 4, 5]
)
# Load checkpoint
print('Loading checkpoint')
checkpoint = torch.load('runningpose_overtuned.bin', 
                        map_location=lambda storage,
                        loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))

model_run = TemporalModel(
    num_joints_in, in_features, num_joints_out, filter_widths, causal, 
    dropout, channels
)
if torch.cuda.is_available():
    model_run = model_run.cuda()

model_run.load_state_dict(checkpoint['model_run'])

from runningpose.core.runningpose_dataset import runningpose_cameras_extrinsic_params
from runningpose.core.camera import camera_to_world_miqus, image_coordinates
from runningpose.core.visualization import render_animation
tindra_cam1 = keypoints_2D_val['miqus1_Tindra_01.avi']['custom']
josef_cam1 = keypoints_2D['miqus1_Josef_01.avi']['custom']
gen = UnchunkedGenerator(
    cameras=None, poses_3d=None, poses_2d=josef_cam1,
    pad=pad, augment=False,
    kps_left=kps_left, kps_right=kps_right, 
    joints_left=joints_left, joints_right=joints_right
)
predicted = 0
data_world = 0
print('Rendering...')
with torch.no_grad():
    # Load the newly trained network
    model_run.eval()

    for _, _, batch2d in gen.next_epoch():
        inputs_2d_valid = torch.from_numpy(batch2d.astype('float32'))
        if torch.cuda.is_available():
            inputs_2d_valid = inputs_2d_valid.cuda()

        # Predict 3D poses (forward)
        predicted_3d_pos = model_run(inputs_2d_valid)
        # Convert predicted 3d poses to world coordinates. 
        predicted_3d_pos = predicted_3d_pos.cpu().detach().numpy()[0]
        predicted = predicted_3d_pos
        predicted_3d_pos = predicted_3d_pos.transpose(1, 0, 2)
    
        # Get camera parameters.
        R = runningpose_cameras_extrinsic_params[0]['rotation']
        T = 0 
        data_3D_world = []
        for keypoint in predicted_3d_pos:
            data_3D_world.append(camera_to_world_miqus(keypoint, R, T))

        data_3D_world = np.array(data_3D_world).transpose(1, 0, 2)
        data_3D_world[:, :, 2] -= np.min(data_3D_world[:, :, 2])
        data_world = data_3D_world
        
        anim_output = {'Reconstruction': data_3D_world}
        input_keypoints = image_coordinates(josef_cam1[0][..., :2], w=1920, h=1088)
        
        render_animation(
            input_keypoints, keypoints_2D_metadata, anim_output,
            runningpose_skeleton, 85, 3000, 70, 'josef01_cam1.gif', size=4, 
            input_video_path='Josef_01_Miqus_14.avi', viewport=(1920, 1088)
        )  

Loading checkpoint
This model was trained for 100 epochs
Rendering...


ffmpeg version 4.3.2 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 10.3.0 (GCC)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1645955405450/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1645955405450/_build_env/bin/x86_64-conda-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-gpl --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-libx264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1645955405450/_build_env/bin/pkg-config
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  l

237/238

In [None]:
input = keypoints_3D['Josef1_Camera1_170Hz_3D_keypoints']
input[-1, :, :]

array([[ 0.42303429,  0.26015752,  0.45581345],
       [ 0.25422998,  0.29278402,  0.2693365 ],
       [ 0.08921538,  0.11156386,  0.06648251],
       [ 0.36620989,  0.25680363,  0.21468882],
       [ 0.14327012,  0.25385077,  0.37796204],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.13445541, -0.11719234, -0.11481342],
       [-0.30846427, -0.73945383, -0.50701398],
       [-0.03658132, -0.11472183,  0.17688657],
       [ 0.10396421, -0.73026476,  0.16168209],
       [ 0.56788958,  0.02446951,  0.20959372],
       [-0.09573019,  0.33995899,  0.22350433],
       [ 0.57400993,  0.19410068,  0.37322601],
       [-0.32357198,  0.29836445,  0.10136282],
       [-0.08741295, -0.41563311, -0.23919829],
       [ 0.22689401, -0.33543887,  0.23222618],
       [-0.30944753, -0.6124015 , -0.48556912],
       [ 0.06312181, -0.62748536,  0.091108  ]])

In [None]:
josef_cam1

In [None]:
data_world[-1, :, :]

array([[ 5.51457702e-01, -5.55888148e-02,  1.24585441e+00],
       [ 3.19046907e-01,  8.70417099e-02,  1.28859515e+00],
       [ 1.32917882e-01,  1.18994460e-01,  9.90692651e-01],
       [ 4.00851926e-01,  7.67496207e-02,  1.25796740e+00],
       [ 3.34549760e-01, -1.26451053e-01,  1.32418406e+00],
       [-1.26689397e-02,  4.75715769e-02,  1.04305712e+00],
       [ 4.17065565e-02,  1.44956873e-01,  9.39580740e-01],
       [-5.24454412e-01,  4.79791096e-02,  1.75782697e-01],
       [ 1.17671038e-01, -2.03634409e-01,  7.86256589e-01],
       [ 2.34132135e-01, -1.20842517e-01,  2.94283479e-01],
       [ 4.73213489e-01,  2.01231589e-01,  9.79725236e-01],
       [ 1.43376905e-01, -3.41055914e-01,  1.20111383e+00],
       [ 7.03693740e-01, -1.18819063e-03,  1.16567796e+00],
       [-2.42053273e-01, -3.30895073e-01,  1.20453803e+00],
       [-1.75004399e-01,  1.64514874e-01,  6.09590697e-01],
       [ 2.80531076e-01, -9.55331498e-02,  6.50585727e-01],
       [-4.64756086e-01,  8.89108347e-02

In [None]:
predicted[-1, :, :]

array([[ 0.37269884,  0.21561809,  0.44779631],
       [ 0.29915917,  0.2947046 ,  0.19378416],
       [ 0.17879465,  0.02178069,  0.00554004],
       [ 0.3518138 ,  0.2549355 ,  0.2519387 ],
       [ 0.16653976,  0.30958998,  0.36488825],
       [ 0.02425397,  0.0825498 , -0.03271024],
       [ 0.12842864, -0.01707054, -0.08140721],
       [-0.36677918, -0.72327644, -0.49295676],
       [-0.05451431, -0.20750384,  0.20330626],
       [ 0.07872485, -0.69974667,  0.15432128],
       [ 0.48455584, -0.01731333,  0.17187122],
       [-0.12155071,  0.18873145,  0.37696043],
       [ 0.52011716,  0.12510826,  0.49917048],
       [-0.39803568,  0.23317257,  0.11190882],
       [-0.02337512, -0.31968534, -0.28541583],
       [ 0.13613904, -0.34937933,  0.21507534],
       [-0.29164234, -0.52452433, -0.45539355],
       [ 0.13788489, -0.43928245, -0.01037715]], dtype=float32)