### [Hand] Ego Pose Visualization

In this notebook you can visualize the hand ego pose annotation. Please make sure you have followed instructions in README.md to get the ground truth JSON file and undistorted Aria images. Some notes about directories to be used:

- `<egoexo_output_dir>`: Directory of the data downloaded by Ego-Exo4D Downloader.
- `<gt_output_dir>`: Output directory of hand ego pose data preparation script.

#### Setup

In [None]:
import cv2
import json
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from utils.utils import get_ego_aria_cam_name, cam_to_img

In [None]:
egoexo_output_dir = "/media/Ego4D/Volume2/egoexo-challenge" # MODIFY
gt_output_dir = "/media/Ego4D/Volume2/egoexo-challenge/egohand" # MODIFY

# Choose split and annotation type
split = "train"  # MODIFY
anno_type = "manual"    # MODIFY
orientation = "landscape" # MODIFY

assert split in ["train", "val", "test"], f"Invalid split: {split}"
assert anno_type in ["manual", "auto"], f"Invalid anno_type: {anno_type}"
assert orientation in ["landscape", "portrait"], f"Invalid orientation: {orientation}"

In [None]:
# Load in train split ground truth annotation
if split in ["train", "val"]:
    gt_anno_path = os.path.join(gt_output_dir, f"annotation/{anno_type}/ego_pose_gt_anno_{split}_public.json")
else:
    gt_anno_path = os.path.join(gt_output_dir, f"annotation/{anno_type}/ego_pose_gt_anno_{split}_private.json")
assert os.path.exists(gt_anno_path), f"{gt_anno_path} doesn't exist"
gt_anno = json.load(open(gt_anno_path))

# Load in takes metadata
takes = json.load(open(os.path.join(egoexo_output_dir, "takes.json")))

# camera pose and parameters
cam_pose_dir = os.path.join(egoexo_output_dir, f"annotations/ego_pose/{split}/camera_pose")

# Undistorted Aria image directory
img_view_prefix = "image_portrait_view" if orientation == "portrait" else "image"
img_dir = os.path.join(gt_output_dir, f"{img_view_prefix}/undistorted/{split}")

# take uid dict
take_to_uid = {each_take['take_name'] : each_take['take_uid'] for each_take in takes if each_take["take_uid"] in gt_anno.keys()}
uid_to_take = {uid:take for take, uid in take_to_uid.items()}

# Helper function for visualization
def vis_2d_bbox(img, two_hands_bbox, take_name, frame_idx, label=""):
    assert isinstance(two_hands_bbox, dict) and len(two_hands_bbox) == 2

    ## Plot
    plt.figure(figsize=(5,5))
    plt.imshow(img)
    for _, one_hand_bbox in two_hands_bbox.items():
        if len(one_hand_bbox) > 0:
            plt.plot([one_hand_bbox[0],one_hand_bbox[2]], [one_hand_bbox[1],one_hand_bbox[1]], 'r')
            plt.plot([one_hand_bbox[0],one_hand_bbox[0]], [one_hand_bbox[1],one_hand_bbox[3]], 'r')
            plt.plot([one_hand_bbox[0],one_hand_bbox[2]], [one_hand_bbox[3],one_hand_bbox[3]], 'r')
            plt.plot([one_hand_bbox[2],one_hand_bbox[2]], [one_hand_bbox[1],one_hand_bbox[3]], 'r')
    plt.title(f"{label} {take_name} - frame_idx={frame_idx}", fontsize=10)
    plt.axis("off")

def vis_2d_hand_pose(img, two_hand_kpts_2d, take_name, frame_idx, label=""):
    ## Vis index misc ###
    finger_index = np.array([[0,1,2,3,4],
                             [0,5,6,7,8],
                             [0,9,10,11,12],
                             [0,13,14,15,16],
                             [0,17,18,19,20]])
    color_dict = {0:'tab:blue', 1:'tab:orange', 2:'tab:green', 3:'tab:red', 4:'tab:purple'}
    assert isinstance(two_hand_kpts_2d, dict) and len(two_hand_kpts_2d) == 2

    ## Plot
    plt.figure(figsize=(5,5))
    plt.imshow(img)
    for _, one_hand_kpts_2d in two_hand_kpts_2d.items():
        if len(one_hand_kpts_2d) > 0:
            for i, each_finger_index in enumerate(finger_index):
                curr_finger_kpts = one_hand_kpts_2d[each_finger_index]
                plt.plot(curr_finger_kpts[:,0], curr_finger_kpts[:,1], marker='o', markersize=2, color=color_dict[i])
    plt.title(f"{label} {take_name} - frame_idx={frame_idx}", fontsize=10)
    plt.axis("off")


def vis_3d_hand_pose(two_hand_kpts_3d):
    ## Vis index misc ###
    finger_index = np.array([[0,1,2,3,4],
                             [0,5,6,7,8],
                             [0,9,10,11,12],
                             [0,13,14,15,16],
                             [0,17,18,19,20]])
    color_dict = {0:'tab:blue', 1:'tab:orange', 2:'tab:green', 3:'tab:red', 4:'tab:purple'}
    assert isinstance(two_hand_kpts_3d, dict) and len(two_hand_kpts_3d) == 2

    fig = plt.figure(figsize=plt.figaspect(0.5))
    for i, hand_order in enumerate(["left", "right"]):
        one_hand_kpts_3d = two_hand_kpts_3d[hand_order]
        ax = fig.add_subplot(1, 2, i+1, projection='3d')
        ax.set_title(f"3D plot - {hand_order} hand")
        if len(one_hand_kpts_3d) > 0:
            for f_ith, each_finger_index in enumerate(finger_index):
                curr_finger_kpts = one_hand_kpts_3d[each_finger_index]
                ax.scatter(curr_finger_kpts[:,0], curr_finger_kpts[:,1], curr_finger_kpts[:,2], color=color_dict[f_ith])
                ax.plot3D(curr_finger_kpts[:,0], curr_finger_kpts[:,1], curr_finger_kpts[:,2], color=color_dict[f_ith])
            ax.set_xlabel("X")
            ax.set_ylabel("Y")
            ax.set_zlabel("Z")
        ax.set_aspect('equal')

#### Select takes

In [None]:
print(f"Found {len(gt_anno)} takes from {split} split:")
for uid in gt_anno:
    print(f"{uid_to_take[uid]:25s} {uid}")

In [None]:
vis_take_name = "upenn_0717_Piano_1_2" # MODIFY

# Get selected take's annotation
vis_take_anno = gt_anno[take_to_uid[vis_take_name]]
print(f"Found {len(vis_take_anno)} images from {vis_take_name} with annotated frame number:")
# Print all annotated frames
print(list(vis_take_anno.keys()))

# Load aria cam name and cam pose
take = [t for t in takes if t['take_name'] == vis_take_name][0]
aria_name = get_ego_aria_cam_name(take)
curr_uid = take_to_uid[vis_take_name]
curr_cam_pose = json.load(open(os.path.join(cam_pose_dir, f"{curr_uid}.json")))

#### Select frame_idx and visualize

Pick one annotated frame and visualize 2D annotation, 3D annotation and projected 2D annotation.

In [None]:
vis_frame_idx = 547 # MODIFY
vis_frame_idx = str(vis_frame_idx)

# Image
img = np.array(Image.open(os.path.join(img_dir, vis_take_name, f"{int(vis_frame_idx):06d}.jpg")))

## hand bounding box visualization
bbox_right = np.array(vis_take_anno[vis_frame_idx]['right_hand_bbox']).astype(np.float32)
bbox_left = np.array(vis_take_anno[vis_frame_idx]['left_hand_bbox']).astype(np.float32)
two_hands_bbox = {"right": bbox_right, "left": bbox_left}
vis_2d_bbox(img, two_hands_bbox, vis_take_name, vis_frame_idx, "bbox")

## 2D annotation visualization
kpts_2d_right = np.array(vis_take_anno[vis_frame_idx]['right_hand_2d']).astype(np.float32)
kpts_2d_left = np.array(vis_take_anno[vis_frame_idx]['left_hand_2d']).astype(np.float32)
two_hands_kpts_2d = {"right": kpts_2d_right, "left": kpts_2d_left}
vis_2d_hand_pose(img, two_hands_kpts_2d, vis_take_name, vis_frame_idx, "2D GT")

## 3D annotation visualization
kpts_3d_right = np.array(vis_take_anno[vis_frame_idx]['right_hand_3d']).astype(np.float32)
kpts_3d_left = np.array(vis_take_anno[vis_frame_idx]['left_hand_3d']).astype(np.float32)
# rotate 3D annotations if in portrait view
if orientation == "portrait":
    print(kpts_3d_right.shape)
    R = np.array([[0,-1,0], [1,0,0], [0,0,1]])
    print(R)
    kpts_3d_right = (R @ kpts_3d_right.T).T
    kpts_3d_left = (R @ kpts_3d_left.T).T
two_hands_kpts_3d = {"right": kpts_3d_right, "left": kpts_3d_left}
vis_3d_hand_pose(two_hands_kpts_3d)

## Projected 2D annotation visualization
intrinsics = np.array(curr_cam_pose[aria_name]["camera_intrinsics"]).astype(np.float32)
kpts_2d_right_proj = cam_to_img(kpts_3d_right, intrinsics) if len(kpts_3d_right) > 0 else kpts_3d_right
kpts_2d_left_proj = cam_to_img(kpts_3d_left, intrinsics) if len(kpts_3d_left) > 0 else kpts_3d_left
two_hands_kpts_2d_proj = {"right": kpts_2d_right_proj, "left": kpts_2d_left_proj}
vis_2d_hand_pose(img, two_hands_kpts_2d_proj, vis_take_name, vis_frame_idx, "Projected 3D GT")