# ORPose-Depth
**Human Pose Estimation on Privacy-Preserving Low-Resolution Depth Images (MICCAI-2019)**

_Vinkle Srivastav, Afshin Gangi, Nicolas Padoy_

[![arXiv](https://img.shields.io/badge/arxiv-2007.08340-red)](https://arxiv.org/abs/2007.08340) 

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/CAMMA-public/ORPose-Depth/blob/master/orpose_depth_demo.ipynb)

------------------------------

**This demo notebook contains the inference and evaluation scripts for the following models: DepthPose_80x60 and DepthPose_64x48**

**Code below needed only for the colab demo**

In [None]:
# Download the code, models and the data
!git clone https://github.com/CAMMA-public/ORPose-Depth.git
%cd ORPose-Depth

!wget wget https://s3.unistra.fr/camma_public/github/DepthPose/models.zip
!wget wget https://s3.unistra.fr/camma_public/github/DepthPose/data.zip
!unzip models.zip
!unzip data.zip
!rm models.zip data.zip
!pip install yacs

# Build the NMS library
%cd lib
!make
%cd ..

## Imports

In [None]:
# imports
import os, sys
import subprocess
import glob
import torch
import torchvision
import random
import json
from tqdm import tqdm
from PIL import Image

# to run the video
from IPython.display import HTML, display
from base64 import b64encode

import cv2
import numpy as np
lib_dir = os.path.join(os.getcwd(),"lib") 
if lib_dir not in sys.path:
    sys.path.insert(0, lib_dir)

# add the local library code
from core.inference import get_poses
from models.depthpose_x8 import get_model
from dataset.mvor import MVORDatasetTest
import matplotlib.pyplot as plt
from utils.vis_utils import VisUtils

## Choose the model type and set the paths to the data and model directory

In [None]:
# choose model type
MODEL_TYPE = "x10" # or x8

# paths and parameters
DATA_DIR = "data"
MODELS_DIR = "models"
PARAMS = {"thre1": 0.1, "thre2": 0.05, "thre3": 0.5}
USE_GPU = True

if torch.cuda.is_available() and USE_GPU:
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")

print("Using device:", DEVICE)

COLOR_IMAGES_DIR = os.path.join(DATA_DIR, "images/MVOR/LR_x5_color")
if MODEL_TYPE == "x8":
    MODEL_PATH = os.path.join(MODELS_DIR, "depthpose_80x60.pth")
    DEPTH_IMAGES_DIR = os.path.join(DATA_DIR, "MOVR/MVOR/images/LR_x8")
    PARAMS["scale"] = 8
elif MODEL_TYPE == "x10":
    MODEL_PATH = os.path.join(MODELS_DIR, "depthpose_64x48.pth")
    DEPTH_IMAGES_DIR = os.path.join(DATA_DIR, "images/MVOR/LR_x10")
    PARAMS["scale"] = 10
else:
    print("please select proper model!")

## Helper functions

In [None]:
def get_files():
    """ Read the color and depth paths
    Returns:
        paths to the color and depth images
    """
    files = [
        f["file_name"]
        for f in json.load(open(os.path.join(DATA_DIR, "annotations/mvor_eval_depth_2018.json")))[
            "images"
        ]
    ]
    files = [
        (os.path.join(DEPTH_IMAGES_DIR, f), os.path.join(COLOR_IMAGES_DIR, f.replace("depth", "color")))
        for f in files
    ]
    print("Found {} depth and color files".format(len(files)))
    return files

def progress_bar(value, max=100):
    """ A HTML helper function to display the progress bar
    Args:
        value ([int]): [current progress bar value]
        max (int, optional): [maximum value]. Defaults to 100.
    Returns:
        [str]: [HTML progress bar string]
    """    
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 100%'
        >
            {value}
        </progress>
    """.format(value=value, max=max)) 

def bgr2rgb(im):
    """[convert opencv image in BGR format to RGB format]
    Args:
        im ([numpy.ndarray]): [input image in BGR format]
    Returns:
        [numpy.ndarray]: [output image in RGB format]
    """    
    b, g, r = cv2.split(im)
    return cv2.merge([r, g, b])

def add_border(im, bordersize=25, mean=0):
    """
    [add border around the image]
    Args:
        im ([numpy.ndarray]): [input image]
    Returns:
        [numpy.ndarray]: [output image]
    """
    return cv2.copyMakeBorder(
        im,
        top=bordersize,
        bottom=bordersize,
        left=bordersize,
        right=bordersize,
        borderType=cv2.BORDER_CONSTANT,
        value=[mean, mean, mean],
    )


def images_to_video(img_folder, output_vid_file, fps=20):
    """[convert png images to video using ffmpeg]
    Args:
        img_folder ([str]): [path to images]
        output_vid_file ([str]): [Name of the output video file name]
    """
    os.makedirs(img_folder, exist_ok=True)
    command = [
        "ffmpeg",
        "-y",
        "-framerate",
        str(fps),
        "-threads",
        "16",
        "-i",
        f"{img_folder}/%06d.png",
        "-profile:v",
        "baseline",
        "-level",
        "3.0",
        "-c:v",
        "libx264",
        "-pix_fmt",
        "yuv420p",
        "-an",
        "-v",
        "error",
        output_vid_file,
    ]
    print(f'\nRunning "{" ".join(command)}"')
    subprocess.call(command)
    print("\nVideo generation finished")

# get the files
files = get_files()

## Get the model and load the weights

In [None]:
model = get_model()
model.load_state_dict(torch.load(MODEL_PATH), strict=True)
model = model.to(DEVICE)
# print(model)

## Inference on the low-resolution depth image

In [None]:
# initialize the visulization rendering object
vis_utils = VisUtils() 

# Read a random low-resolution depth image and corresponding color image
path_depth, path_color = random.choice(files)

# Convert depth image to PyTorch tensor and display the size
im_depth = torchvision.transforms.functional.to_tensor(Image.open(path_depth)).unsqueeze_(0)
im_depth = im_depth.to(DEVICE)
print("size of the input depth image: {}x{}".format(im_depth.shape[-1], im_depth.shape[-2]))

# Read the corresponding color image 
im_color = cv2.resize(bgr2rgb(cv2.imread(path_color)), (vis_utils.width, vis_utils.height))

# Get the keypoints by running the inference code
keypoints = get_poses(model, im_depth, PARAMS)

# Render the keypoints on the depth image
im_depth = im_depth.cpu().squeeze().numpy()
im_depth = vis_utils.render(im_depth, keypoints)    

# Plot the resuls
fig = plt.figure(figsize=(20,10))
fig.add_subplot(1, 2, 1); plt.imshow(im_depth); plt.title("Output depth"); plt.axis("off")
fig.add_subplot(1, 2, 2); plt.imshow(im_color); plt.title("Corresponding color image"); plt.axis("off")
plt.show()

# Run the cell again to check the result on different random image

## Inference demo on the video frames

### Set the paths for the video frames

In [None]:
# Set the paths
IMGS_PATH_DEPTH = "data/images/MVOR_seq/LR_x10"
IMGS_PATH_COLOR = "data/images/MVOR_seq/LR_x10_color"


# Name of the output video file
OUTPUT_DIR = "output"
OUTPUT_VID_NAME = os.path.join(OUTPUT_DIR, "output.mp4")
os.makedirs(OUTPUT_DIR, exist_ok=True)
        
# Read the paths and put it in a list
files_depth = sorted(glob.glob(IMGS_PATH_DEPTH + "/*.png"))
files_color = sorted(glob.glob(IMGS_PATH_COLOR + "/*.png"))
files = [(d,c) for d,c in zip(files_depth, files_color)]

## Inference and rendering

In [None]:
print("Running inference on video frames")
# Initialize the progress-bar
out = display(progress_bar(1, len(files)), display_id=True)

font = cv2.FONT_HERSHEY_SIMPLEX
org = (250, 25)
fontScale = 0.5
color = (255, 255, 255) # white
thickness = 1   

# Run the inference on the low-resolution depth frames
for index, (path_depth, path_color) in enumerate(files):
    im_depth = torchvision.transforms.functional.to_tensor(Image.open(path_depth)).unsqueeze_(0)
    im_depth = im_depth.to(DEVICE)
    im_color = cv2.imread(path_color)
    im_color = cv2.resize(im_color, (80, 60))
    im_color = cv2.resize(im_color, (vis_utils.width, vis_utils.height))
    keypoints = get_poses(model, im_depth, PARAMS)
    im_depth = im_depth.cpu().squeeze().numpy()
        
    im_depth = vis_utils.render(im_depth, keypoints, bgr2rgb=False)
    im_depth = add_border(im_depth, bordersize=35)
    im_color = add_border(im_color, bordersize=35)    
    im_depth = cv2.putText(im_depth, "Output on low-resolution depth images", (200, 25), font, fontScale, color, thickness, cv2.LINE_AA)    
    im_color = cv2.putText(im_color, "Corresponding color images", (250, 25), font, fontScale, color, thickness, cv2.LINE_AA)    
    vis = np.concatenate((im_depth, im_color), axis=1)
    cv2.imwrite(os.path.join(OUTPUT_DIR, f"{index:06d}" + ".png"), vis)
    out.update(progress_bar(index + 1, len(files)))

# Convert the rendered images to video
images_to_video(OUTPUT_DIR, OUTPUT_VID_NAME, fps=10)    

## Show the output video

In [None]:
mp4 = open(OUTPUT_VID_NAME, "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(""" <video width=800 controls>
         <source src="%s" type="video/mp4">
         </video> 
     """ % data_url
    )

### Evaluation code for **DepthPose_64x48** and **DepthPose_80x60** on the MVOR dataset. 

**Each model takes approximately 40 minutes for the evaluation**

In [None]:
# To run the evaluation for DepthPose_64x48 model
!python tools/eval_mvor.py --config_file experiments/mvor/DepthPose_64x48.yaml                     

In [None]:
# To run the evaluation for DepthPose_80x60 model
!python tools/eval_mvor.py --config_file experiments/mvor/DepthPose_80x60.yaml