In [57]:
import torch
import pickle
import pandas as pd
import os
from PIL import Image, ImageOps
import numpy as np
from utils import normalize_rgb, render_meshes, get_focalLength_from_fieldOfView, demo_color as color, print_distance_on_image, render_side_views, create_scene, MEAN_PARAMS, CACHE_DIR_MULTIHMR, SMPLX_DIR
from model import Model

# Functions from demo.py

In [58]:
def open_image(img_path, img_size, device=torch.device('cuda')):
    """ Open image at path, resize and pad """

    # Open and reshape
    img_pil = Image.open(img_path).convert('RGB')
    img_pil = ImageOps.contain(img_pil, (img_size,img_size)) # keep the same aspect ratio

    # Keep a copy for visualisations.
    img_pil_bis = ImageOps.pad(img_pil.copy(), size=(img_size,img_size), color=(255, 255, 255))
    img_pil = ImageOps.pad(img_pil, size=(img_size,img_size)) # pad with zero on the smallest side

    # Go to numpy 
    resize_img = np.asarray(img_pil)

    # Normalize and go to torch.
    resize_img = normalize_rgb(resize_img)
    x = torch.from_numpy(resize_img).unsqueeze(0).to(device)
    return x, img_pil_bis

def load_model(model_name, device=torch.device('cuda')):
    """ Open a checkpoint, build Multi-HMR using saved arguments, load the model weigths. """
    # Model
    ckpt_path = os.path.join(CACHE_DIR_MULTIHMR, model_name+ '.pt')
    if not os.path.isfile(ckpt_path):
        os.makedirs(CACHE_DIR_MULTIHMR, exist_ok=True)
        print(f"{ckpt_path} not found...")
        print("It should be the first time you run the demo code")
        print("Downloading checkpoint from NAVER LABS Europe website...")
        
        try:
            os.system(f"wget -O {ckpt_path} https://download.europe.naverlabs.com/ComputerVision/MultiHMR/{model_name}.pt")
            print(f"Ckpt downloaded to {ckpt_path}")
        except:
            assert "Please contact fabien.baradel@naverlabs.com or open an issue on the github repo"

    # Load weights
    print("Loading model")
    ckpt = torch.load(ckpt_path, map_location=device)

    # Get arguments saved in the checkpoint to rebuild the model
    kwargs = {}
    for k,v in vars(ckpt['args']).items():
            kwargs[k] = v

    # Build the model.
    kwargs['type'] = ckpt['args'].train_return_type
    kwargs['img_size'] = ckpt['args'].img_size[0]
    model = Model(**kwargs).to(device)

    # Load weights into model.
    model.load_state_dict(ckpt['model_state_dict'], strict=False)
    print("Weights have been loaded")

    return model

def forward_model(model, input_image, camera_parameters,
                  det_thresh=0.3,
                  nms_kernel_size=1,
                 ):
        
    """ Make a forward pass on an input image and camera parameters. """
    
    # Forward the model.
    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled=True):
            humans = model(input_image, 
                           is_training=False, 
                           nms_kernel_size=int(nms_kernel_size),
                           det_thresh=det_thresh,
                           K=camera_parameters)

    return humans

def get_camera_parameters(img_size, fov=60, p_x=None, p_y=None, device=torch.device('cuda')):
    """ Given image size, fov and principal point coordinates, return K the camera parameter matrix"""
    K = torch.eye(3)
    # Get focal length.
    focal = get_focalLength_from_fieldOfView(fov=fov, img_size=img_size)
    K[0,0], K[1,1] = focal, focal

    # Set principal point
    if p_x is not None and p_y is not None:
            K[0,-1], K[1,-1] = p_x * img_size, p_y * img_size
    else:
            K[0,-1], K[1,-1] = img_size//2, img_size//2

    # Add batch dimension
    K = K.unsqueeze(0).to(device)
    return K

def overlay_human_meshes(humans, K, model, img_pil, unique_color=False):

    # Color of humans seen in the image.
    _color = [color[0] for _ in range(len(humans))] if unique_color else color
    
    # Get focal and princpt for rendering.
    focal = np.asarray([K[0,0,0].cpu().numpy(),K[0,1,1].cpu().numpy()])
    princpt = np.asarray([K[0,0,-1].cpu().numpy(),K[0,1,-1].cpu().numpy()])

    # Get the vertices produced by the model.
    verts_list = [humans[j]['verts_smplx'].cpu().numpy() for j in range(len(humans))]
    faces_list = [model.smpl_layer['neutral'].bm_x.faces for j in range(len(humans))]

    # Render the meshes onto the image.
    pred_rend_array = render_meshes(np.asarray(img_pil), 
            verts_list,
            faces_list,
            {'focal': focal, 'princpt': princpt},
            alpha=1.0,
            color=_color)

    return pred_rend_array, _color


# Load Model

In [59]:
model = load_model('multiHMR_896_L')

Loading model


Using cache found in /home/scott/.cache/torch/hub/facebookresearch_dinov2_main


Weights have been loaded


# Output from provided multiHMR model

input image from  https://agora.is.tue.mpg.de/download.php

In [60]:
p_x, p_y = None, None
K = get_camera_parameters(model.img_size, fov=60, p_x=p_x, p_y=p_y)

img_size = model.img_size
x, img_pil_nopad = open_image("./AGORA/train_0/ag_trainset_renderpeople_bfh_archviz_5_10_cam02_00000_1280x720.png", img_size)

humans = forward_model(model, x, K,
                        det_thresh=0.3,
                        nms_kernel_size=1)

print(len(humans))
print(humans)

7
[{'scores': tensor(0.6665, device='cuda:0', dtype=torch.float16), 'loc': tensor([531.4258, 272.6599], device='cuda:0'), 'transl': tensor([ 1.7921, -3.7569, 16.6450], device='cuda:0'), 'transl_pelvis': tensor([[ 1.7705, -3.2154, 16.9328]], device='cuda:0'), 'rotvec': tensor([[-2.0775e+00, -5.8167e-01,  1.9205e+00],
        [ 2.6661e-01,  4.0917e-02, -8.2433e-02],
        [-1.3242e-01, -1.7609e-01, -2.0738e-01],
        [ 8.2089e-02, -2.3760e-02, -2.9046e-02],
        [-1.2063e-01,  1.8840e-02,  1.3757e-02],
        [ 6.8199e-02, -1.7802e-01,  1.9632e-03],
        [ 2.4243e-02, -1.7958e-02,  2.7167e-03],
        [-1.4829e-03,  2.4381e-01,  3.1830e-02],
        [ 2.1374e-01, -1.4952e-01,  1.6390e-01],
        [ 4.7487e-02,  2.0317e-02, -1.5333e-02],
        [ 4.0485e-02,  3.7870e-02,  9.8753e-03],
        [ 2.5840e-03, -1.9328e-02, -3.3242e-02],
        [-4.4719e-02,  1.5031e-01,  7.7626e-02],
        [-4.6580e-02,  4.3241e-02, -2.1462e-01],
        [-3.6644e-02,  2.8445e-02,  1.3140e-0

In [61]:
img_array = np.asarray(img_pil_nopad)
img_pil_visu= Image.fromarray(img_array)
pred_rend_array, _color = overlay_human_meshes(humans, K, model, img_pil_visu, unique_color=0)
_img = np.concatenate([img_array, pred_rend_array],1).astype(np.uint8)
Image.fromarray(_img).save("./notebook_image.png")

![](./notebook_image.png)

# Ground Truths

In [145]:
print(os.path.getsize('AGORA/SMPLX/train_0_withjv.pkl'))
with open("AGORA/SMPLX/train_0_withjv.pkl", "rb") as file:
    df = pd.read_pickle(file)
    print(df.columns)
    print(len(df))

2808879008
Index(['X', 'Y', 'Z', 'Yaw', 'imgPath', 'camYaw', 'camZ', 'camY', 'camX',
       'gender', 'gt_path_smpl', 'gt_path_smplx', 'kid', 'occlusion',
       'isValid', 'age', 'ethnicity', 'gt_joints_2d', 'gt_joints_3d',
       'gt_verts'],
      dtype='object')
1453


In [203]:
mathcing_row = df[df['imgPath'] == "ag_trainset_renderpeople_bfh_archviz_5_10_cam02_00000.png"]

gt_joints_2d = np.array(mathcing_row['gt_joints_2d'])
print("humans in image:", len(gt_joints_2d[0])) # amount of humans

for i, joints in enumerate(gt_joints_2d[0]):
    print(f"GT 2d-joints for human {i}:\n", joints[:5] * (720.0/2160.0)) # adjust for image size
    print("total number of joints:", len(joints))
    print("\n")

print("\n\n")

for i, joints in enumerate(humans):
    print(f"predicted 2d-joints for human {i}:\n", humans[i]['j2d_smplx'][:5])
    print("total number of joints:", len(humans[i]))
    print("\n")

humans in image: 5
GT 2d-joints for human 0:
 [[750.87976073 188.97040134]
 [753.05269932 201.15031284]
 [749.22728334 199.11977521]
 [756.88105914 176.83307534]
 [759.27979191 243.86683681]]
total number of joints: 127


GT 2d-joints for human 1:
 [[1032.1083049   289.63733505]
 [1036.61105265  297.81957183]
 [1022.7584728   300.84807069]
 [1030.58494785  278.02774973]
 [1063.1310532   322.84509742]]
total number of joints: 127


GT 2d-joints for human 2:
 [[1213.96841734  193.73010062]
 [1203.10909572  201.83791534]
 [1216.00328656  205.25786795]
 [1215.04774422  183.69883591]
 [1192.09509946  237.86194236]]
total number of joints: 127


GT 2d-joints for human 3:
 [[958.68182482 368.79645137]
 [950.71889453 375.85076155]
 [968.75400521 382.63737246]
 [952.50504615 364.82353424]
 [951.4528464  356.37034812]]
total number of joints: 127


GT 2d-joints for human 4:
 [[1190.47329667  212.96785198]
 [1176.72980018  222.70570667]
 [1197.02374788  226.07969626]
 [1193.31300684  202.6012603 