# Live 3D Human Pose Estimation with OpenVINO
This notebook demonstrates live 3D Human Pose Estimation with OpenVINO via a webcam. We utilize the model [human-pose-estimation-3d-0001](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/human-pose-estimation-3d-0001) from [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/).

##### Specify where you want to run your inferences, model_zoo_url, model name for 3D pose estimation and video source

In [None]:
# hw_location: where you want to run inference
#     "@cloud" to use DeGirum cloud
#     "@local" to run on local machine
#     IP address for AI server inference
# model_zoo_url: url/path for model zoo
#     cloud_zoo_url: valid for @cloud, @local, and ai server inference options
#     '': ai server serving models from local folder
#     path to json file: single model zoo in case of @local inference
# model_name: name of the model for running AI inference
# image_source: image source for inference
#     path to image file
#     URL of image
#     PIL image object
#     numpy array

hw_location = "@local"
model_zoo_url = "https://cs.degirum.com/degirum/openvino_demos"
pose_3d_model_name = "human_pose_estimation--256x448_float_openvino_cpu_1"
video_source = "https://github.com/intel-iot-devkit/sample-videos/raw/master/face-demographics-walking.mp4"


#### Imports

In [None]:
import degirum as dg,degirum_tools
import numpy as np
import sys, cv2, time
import ipywidgets as widgets
import numpy as np
from IPython.display import display
sys.path.append("./engine")
import engine.engine3js as engine
from engine.parse_poses import parse_poses
from pose_3d_preprocessor import Pose3DPreprocessor

In [None]:
pose_3d_zoo = dg.connect(hw_location, model_zoo_url, degirum_tools.get_token())
pose_3d_model = pose_3d_zoo.load_model(pose_3d_model_name)

In [None]:
# 3D edge index array
body_edges = np.array(
    [
        [0, 1],
        [0, 9],
        [9, 10],
        [10, 11],  # neck - r_shoulder - r_elbow - r_wrist
        [0, 3],
        [3, 4],
        [4, 5],  # neck - l_shoulder - l_elbow - l_wrist
        [1, 15],
        [15, 16],  # nose - l_eye - l_ear
        [1, 17],
        [17, 18],  # nose - r_eye - r_ear
        [0, 6],
        [6, 7],
        [7, 8],  # neck - l_hip - l_knee - l_ankle
        [0, 12],
        [12, 13],
        [13, 14],  # neck - r_hip - r_knee - r_ankle
    ]
)


body_edges_2d = np.array(
    [
        [0, 1],  # neck - nose
        [1, 16],
        [16, 18],  # nose - l_eye - l_ear
        [1, 15],
        [15, 17],  # nose - r_eye - r_ear
        [0, 3],
        [3, 4],
        [4, 5],  # neck - l_shoulder - l_elbow - l_wrist
        [0, 9],
        [9, 10],
        [10, 11],  # neck - r_shoulder - r_elbow - r_wrist
        [0, 6],
        [6, 7],
        [7, 8],  # neck - l_hip - l_knee - l_ankle
        [0, 12],
        [12, 13],
        [13, 14],  # neck - r_hip - r_knee - r_ankle
    ]
)


def draw_poses(frame, poses_2d, scaled_frame, use_popup):
    """
    Draw 2D pose overlays on the image to visualize estimated poses.
    Joints are drawn as circles and limbs are drawn as lines.

    :param frame: the input image
    :param poses_2d: array of human joint pairs
    """
    for pose in poses_2d:
        pose = np.array(pose[0:-1]).reshape((-1, 3)).transpose()
        was_found = pose[2] > 0

        pose[0], pose[1] = (
            pose[0] * frame.shape[1] / scaled_frame.shape[1],
            pose[1] * frame.shape[0] / scaled_frame.shape[0],
        )
        # Draw joints.
        for edge in body_edges_2d:
            if was_found[edge[0]] and was_found[edge[1]]:
                cv2.line(
                    frame,
                    tuple(pose[0:2, edge[0]].astype(np.int32)),
                    tuple(pose[0:2, edge[1]].astype(np.int32)),
                    (255, 255, 0),
                    4,
                    cv2.LINE_AA,
                )
        # Draw limbs.
        for kpt_id in range(pose.shape[1]):
            if pose[2, kpt_id] != -1:
                cv2.circle(
                    frame,
                    tuple(pose[0:2, kpt_id].astype(np.int32)),
                    3,
                    (0, 255, 255),
                    -1,
                    cv2.LINE_AA,
                )

    return frame

In [None]:
inp_H, inp_W = pose_3d_model.model_info.InputW[0],pose_3d_model.model_info.InputC[0]
inp_H, inp_W

In [None]:
num_frames = 100  # Maximum number of frames to read from video, set to 0 for all frames.
sample_duration = 16
use_popup=False
with degirum_tools.open_video_stream(video_source) as video_stream:
    w, h, fps = degirum_tools.get_video_stream_properties(video_stream)
    fps = 30
    if num_frames == 0:
        total_frames = video_stream.get(cv2.CAP_PROP_FRAME_COUNT)
    else:
        total_frames = num_frames
    counter = 0
    frames = []
    focal_length = -1  # default
    stride = 8
    skeleton_set = None
    progress = degirum_tools.Progress(total_frames)
    if use_popup:
        title = "Press ESC to Exit"
        cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)  
        
    for i, frame in enumerate(degirum_tools.video_source(video_stream)):
        if i == 0:
            resize_scale = 450 / frame.shape[1]
            windows_width = int(frame.shape[1] * resize_scale)
            windows_height = int(frame.shape[0] * resize_scale)

            # use visualization library
            engine3D = engine.Engine3js(
                grid=True, axis=True, view_width=windows_width, view_height=windows_height
            )

            if use_popup:
                # display the 3D human pose in this notebook, and origin frame in popup window
                display(engine3D.renderer)
                title = "Press ESC to Exit"
                cv2.namedWindow(title, cv2.WINDOW_KEEPRATIO | cv2.WINDOW_AUTOSIZE)
            else:
                # set the 2D image box, show both human pose and image in the notebook
                imgbox = widgets.Image(
                    format="jpg", height=windows_height, width=windows_width
                )
                display(widgets.HBox([engine3D.renderer, imgbox]))

            skeleton = engine.Skeleton(body_edges=body_edges)

        if i == total_frames:
            break

        resized_frame, scaled_frame = Pose3DPreprocessor(inp_H, inp_W).load_frame(frame)
        if focal_length < 0:  # Focal length is unknown
            focal_length = np.float32(0.8 * scaled_frame.shape[1])

        # inference start
        start_time = time.time()
        # get results
        res = pose_3d_model(scaled_frame)
        inference_result = (res.results[0]["data"][0],res.results[1]["data"][0],res.results[2]["data"][0])
        # Process the point to point coordinates of the data
        poses_3d, poses_2d = parse_poses(
            inference_result, 1, stride, focal_length, True
        )
        if len(poses_3d) > 0:
            # From here, you can rotate the 3D point positions using the function "draw_poses",
            # or you can directly make the correct mapping below to properly display the object image on the screen
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = (
                -z + np.ones(poses_3d[:, 2::4].shape) * 200,
                -y + np.ones(poses_3d[:, 2::4].shape) * 100,
                -x,
            )

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            people = skeleton(poses_3d=poses_3d)

            try:
                engine3D.scene_remove(skeleton_set)
            except Exception:
                pass

            engine3D.scene_add(people)
            skeleton_set = people

            # draw 2D
            frame = draw_poses(frame, poses_2d, resized_frame, use_popup)

        else:
            try:
                engine3D.scene_remove(skeleton_set)
                skeleton_set = None
            except Exception:
                pass
            
        if use_popup:
            cv2.imshow(title, frame)
            key = cv2.waitKey(1)
            # escape = 27, use ESC to exit
            if key == 27:
                break
        else:
            # encode numpy array to jpg
            imgbox.value = cv2.imencode(
                ".jpg",
                frame,
                params=[cv2.IMWRITE_JPEG_QUALITY, 90],
            )[1].tobytes()

        engine3D.renderer.render(engine3D.scene, engine3D.cam)         
        progress.step()
