In [1]:
import os
import os.path as osp
import sys
import json

import numpy as np
from transforms3d.quaternions import mat2quat, quat2mat

In [2]:
def rt_to_isaac_pose(rt):
    """
    Converts a 4x4 transform to (trans, quaternion) pose
    quaternion format is for isaac sim (w,x,y,z) i.e scalar_first

    Returns:
        quat: quaternion wxyz format
        trans: translation
    """
    quat = mat2quat(rt[:3, :3])
    trans = rt[:3, 3]
    return quat, trans

In [3]:
datadir = "/home/ninad/Datasets/RFP_RealWorldExp/humandemo/data_captured_rfpv1"
scene_obj_dir = osp.join(datadir, "s10_004")
taskdir = "task_0_2s"

In [4]:
OBJID_TO_SCENES = {
    "003" : ["s27", "s48"],
    "004" : ["s10", "s141"],
    "005" : ["s65", "s130"],
    "006" : ["s83", "s130"],
    "007" : ["s56", "s122"],
    "008" : ["s38", "s56"],
    "009" : ["s122", "s39"],
    "010" : ["s77", "s83"],
    "011" : ["s10", "s27"],
    "021" : ["s83", "s104"],
    "024" : ["s77", "s141"],
    "025" : ["s27", "s84"],
    "035" : ["s10", "s68"],
    "037" : ["s148", "s161"],
    "040" : ["s148", "s161"],
    "052" : ["s27", "s48"],
}

YCBID_MAP = {
    "003" : "003_cracker_box",
    "004" : "004_sugar_box",
    "005" : "005_tomato_soup_can",
    "006" : "006_mustard_bottle",
    "007" : "007_tuna_fish_can",
    "008" : "008_pudding_box",
    "009" : "009_gelatin_box",
    "010" : "010_potted_meat_can",
    "011" : "011_banana",
    "021" : "021_bleach_cleanser",
    "024" : "024_bowl",
    "025" : "025_mug",
    "035" : "035_power_drill",
    "037" : "037_scissors",
    "040" : "040_large_marker",
    "052" : "052_extra_large_clamp",
}

# i.e fingertip link is delta meters ahead of wrist roll link when considering old (original) fetch fingers
delta_fingertip_old = 0.19645
TF_wrist_roll_to_fingertip = np.eye(4)
TF_wrist_roll_to_fingertip[0, 3] = delta_fingertip_old


In [None]:
def get_gripper_pose(scene_obj_dir: str, taskdir: str):
    npzdir = osp.join(scene_obj_dir, 
                  taskdir, 
                  "out",
                  "hamer",
                  "model")
    key_gripper_pose = "target_transfer_pose"
    frameid = 0
    npzfile = osp.join(npzdir, f"{frameid:06d}.npz")
    if not os.path.exists(npzfile):
        print("[WRN]: npz not found:", scene_obj_dir[-8:], taskdir)
        return None
    else:
        data = np.load(npzfile, allow_pickle=True)
        if key_gripper_pose in data:
            gripper_pose_in_cam = data[key_gripper_pose][0]
        else:
            print("[ERROR]: grasp key not found:", scene_obj_dir[-8:], taskdir)
            return None
    return gripper_pose_in_cam


def collect_scene_obj_poses(scene_obj_dir: str):
    all_poses = []
    for tdir in sorted([l for l in os.listdir(scene_obj_dir) if osp.isdir(osp.join(scene_obj_dir, l))]):
        curr_gripper_pose = get_gripper_pose(scene_obj_dir, tdir)
        if curr_gripper_pose is not None:
            all_poses.append(curr_gripper_pose)
    return np.array(all_poses)


def construct_grasp_json(datadir: str, scene_id: str, objid: str):   
    """ Collects all the transferred gripper grasps by iterating
    through all task dirs in the scene-object directory and creates
    a grasp json file similar to expected structure as RFP experiments.
    Saves the json in the relevant scene-object directory

    Inputs
    ------

    datadir: (str) path to processed dir containing human demo to fetch gripper transfer data
    
    scene_id: (str) scenereplica scene id, e.g. s10
    
    object_id: (str) ycb object numeric key, e.g, 003

    Output
    ------

    json file

    The json contains poses for the gripper in (1) RT 4x4 matrix, and 
    (2) pose 7d (trans, quat_wxyz) format. The gripper poses are saved
    wrt both (older urdf) wrist roll frame, and (newer urdf) fingetip i.e
    both these conventions define the origin of the gripper. For more
    details, see: `https://github.com/IRVLUTD/isaac_sim_grasping/commit/ee00ac4ef8dc5541d4b56c4eb516c09352f66e7f`


    Keys for saved json

    gripper : hardcoded to fetch_gripper since this is the only one used
    object_id : ycb object id, e.g. 003_cracker_box
    scene_id : scene replica scene id, e.g. s10 (s just means a scene)
    info : help message about the contents of json
    pose : grasp pose (in camera frame) for gripper fingertip in isaac format (trans, quat_wxyz)
    other pose keys for help in debugging: [pose_wroll, pose_wroll_rt, pose_ftip_rt]
    """
    scene_obj_dir = osp.join(datadir, f"{scene_id}_{objid}")

    # note all poses are still in camera frame!
    gripper_poses_wroll_rt = collect_scene_obj_poses(scene_obj_dir)
    gripper_poses_ftip_rt = gripper_poses_wroll_rt @ TF_wrist_roll_to_fingertip
    
    # create and return json here?
    GRIPPER_NAME = "fetch_gripper"
    OBJECT_NAME = YCBID_MAP[objid]
    grasp_info_dict = {}
    grasp_info_dict["gripper"] = GRIPPER_NAME 
    grasp_info_dict["object_id"] = OBJECT_NAME 
    grasp_info_dict["scene_id"] = scene_id
    grasp_info_dict["info"] = "Transferred grasps poses from human to fetch gripper in Head Camera frame. `pose` key contains the gripper fingetip pose as (trans, quat_wxyz) isaacsim format. other formats also included in keys"

    pose_ftip_isaac = []
    pose_ftip_rt = []
    pose_wroll_isaac = []
    pose_wroll_rt = []

    ### Populate lists for grasp poses in different frames and formats 
    num_grasps = len(gripper_poses_wroll_rt)
    for i in range(num_grasps):
        curr_rt_wroll = gripper_poses_wroll_rt[i]
        curr_rt_ftip = gripper_poses_ftip_rt[i]
        pose_wroll_rt.append(curr_rt_wroll.tolist())
        pose_ftip_rt.append(curr_rt_ftip.tolist())
        
        q_wroll, t_wroll = rt_to_isaac_pose(curr_rt_wroll)
        pose_wroll_isaac.append(
            [
                *t_wroll.tolist(), 
                *q_wroll.tolist(),
            ]
        )
        
        q_ftip, t_ftip = rt_to_isaac_pose(curr_rt_ftip)
        pose_ftip_isaac.append(
            [
                *t_ftip.tolist(),
                *q_ftip.tolist(),
            ]
        )
    
    ### Add to json
    grasp_info_dict['num_grasps'] = num_grasps
    grasp_info_dict['pose'] = pose_ftip_isaac
    grasp_info_dict['pose_wroll'] = pose_wroll_isaac
    grasp_info_dict['pose_rt_ftip'] = pose_ftip_rt
    grasp_info_dict['pose_rt_wroll'] = pose_wroll_rt

    out_json_f = osp.join(scene_obj_dir, f"{GRIPPER_NAME}-{OBJECT_NAME}.json")
    with open(out_json_f, "w") as jf:
        json.dump(grasp_info_dict, jf)
    print("[LOG] num grasps:", num_grasps)
    return num_grasps



In [20]:
for oid in OBJID_TO_SCENES:
    scenes = OBJID_TO_SCENES[oid]
    for sid in scenes:
        print("Processing (scene) (object):", sid, oid)
        _ = construct_grasp_json(datadir, scene_id=sid, objid=oid)
    print("\n")

Processing (scene) (object): s27 003
[WRN]: npz not found: v1/s27_003 task_2_2s
[LOG] num grasps: 4
Processing (scene) (object): s48 003
[WRN]: npz not found: v1/s48_003 task_0_2s
[WRN]: npz not found: v1/s48_003 task_1_3s
[WRN]: npz not found: v1/s48_003 task_4_1s
[LOG] num grasps: 2


Processing (scene) (object): s10 004
[LOG] num grasps: 5
Processing (scene) (object): s141 004
[LOG] num grasps: 5


Processing (scene) (object): s65 005
[LOG] num grasps: 5
Processing (scene) (object): s130 005
[LOG] num grasps: 5


Processing (scene) (object): s83 006
[LOG] num grasps: 5
Processing (scene) (object): s130 006
[LOG] num grasps: 5


Processing (scene) (object): s56 007
[LOG] num grasps: 5
Processing (scene) (object): s122 007
[LOG] num grasps: 5


Processing (scene) (object): s38 008
[LOG] num grasps: 5
Processing (scene) (object): s56 008
[LOG] num grasps: 5


Processing (scene) (object): s122 009
[LOG] num grasps: 5
Processing (scene) (object): s39 009
[LOG] num grasps: 5


Processing 

In [None]:
# # i.e fingertip link is delta meters ahead of wrist roll link when considering old (original) fetch fingers
# delta_fingertip_old = 0.19645
# TF_wrist_roll_to_fingertip = np.eye(4)
# TF_wrist_roll_to_fingertip[0, 3] = delta_fingertip_old