In [1]:
!pwd

/home/giakhang/dev/RDPN6D


In [3]:
import trimesh
import numpy as np
from scipy.spatial.distance import pdist
import os.path as osp
import glob
from pathlib import Path
import json
import cv2
import matplotlib.pyplot as plt
import shutil
import os
import trimesh
import imageio
import random
from scipy.spatial.transform import Rotation as R

# Prepare model info

In [None]:
def get_model_info(file_path):
    mesh = trimesh.load(file_path)

    # Get bounding box extents
    min_bounds = mesh.bounds[0]
    max_bounds = mesh.bounds[1]
    
    size_x, size_y, size_z = max_bounds - min_bounds
    min_x, min_y, min_z = min_bounds

    # Compute diameter as the max pairwise Euclidean distance between vertices
    vertices = mesh.vertices
    diameter = np.max(pdist(vertices))  # Computes all pairwise distances

    return {
        "diameter": float(round(diameter, 6)),
        "min_x": float(round(min_x, 6)),
        "min_y": float(round(min_y, 6)),
        "min_z": float(round(min_z, 6)),
        "size_x": float(round(size_x, 6)),
        "size_y": float(round(size_y, 6)),
        "size_z": float(round(size_z, 6))
    }

In [None]:
model_files = ["/home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/models/obj_000001.ply"]

des_dir = Path("/home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/models")

models_info = dict()
for i, f in enumerate(model_files):
    # unit: meters (m)
    info = get_model_info(f)
    models_info[str(i+1)] = info

#file_name = "models_info.json"
#with open(des_dir / file_name, "w") as f:
    #json.dump(models_info, f)

In [6]:
models_info

{'1': {'diameter': 102.903463,
  'min_x': -26.315001,
  'min_y': -38.921001,
  'min_z': -25.5655,
  'size_x': 52.630001,
  'size_y': 77.842003,
  'size_z': 51.131001,
  'symmetries_discrete': []}}

# Prepare RGBD, mask

In [2]:
num = 7
track_num = f"track_{num:02d}"

ism_mask_dir = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/masks")
ism_mask_files = os.listdir(ism_mask_dir)
ism_mask_files = [f for f in ism_mask_files if f.endswith(".png")]
ism_mask_files = sorted(ism_mask_files)
intr_path = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/cam_K.txt")
cam_info_path = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/camera.json")

with open(cam_info_path, "r") as f:
    cam_info = json.load(f)

dest_dir = Path(f"/home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/data") / track_num

os.makedirs(dest_dir, exist_ok=True)

dest_rgb = dest_dir / "rgb"
dest_depth = dest_dir / "depth"
dest_mask = dest_dir / "mask"
dest_mask_visib = dest_dir / "mask_visib"  
debug_dir = dest_dir /  "2d_boxes_and_projected_boxes_debug"

scene_camera_file = dest_dir / "scene_camera.json"
scene_gt_info_file = dest_dir / "scene_gt_info.json"
scene_gt_file = dest_dir / "scene_gt.json"

if osp.exists(dest_rgb):
    shutil.rmtree(dest_rgb)    
if osp.exists(dest_depth):
    shutil.rmtree(dest_depth)
if osp.exists(dest_mask):
    shutil.rmtree(dest_mask)
if osp.exists(dest_mask_visib):
    shutil.rmtree(dest_mask_visib)
if osp.exists(debug_dir):
    shutil.rmtree(debug_dir)
os.makedirs(dest_rgb, exist_ok=True)
os.makedirs(dest_depth, exist_ok=True)
os.makedirs(dest_mask, exist_ok=True)
os.makedirs(dest_mask_visib, exist_ok=True)
os.makedirs(debug_dir, exist_ok=True)

pose_result_dir = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/foundation_pose_results/pose_results")
depth_src_dir = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/depth")
rgb_src_dir = Path(f"/media/giakhang/OS/dataset/piano_6d/{track_num}/rgb")

In [3]:
def calc_projected_2d_box(P, rot_mat, trans_mat, intr):
    """
    vertices_3d: 8x3 np array
    rot_mat: 3x3 np
    trans_mat: 3x1 np
    intr: 3x3 np
    """

    P_cam = (rot_mat @ P.T).T + trans_mat.T
    P_image = (intr @ P_cam.T).T

    x_min = min(P_image[:, 0])
    x_max = max(P_image[:, 0])
    y_min = min(P_image[:, 1])
    y_max = max(P_image[:, 1])

    return x_min, y_min, x_max, y_max

def calculate_2d_projections(coordinates_3d, intrinsics):
    """
    Input: 
        coordinates: [3, N]
        intrinsics: [3, 3]
    Return 
        projected_coordinates: [N, 2]
    """
    projected_coordinates = intrinsics @ coordinates_3d
    projected_coordinates = projected_coordinates[:2, :] / projected_coordinates[2, :]
    projected_coordinates = projected_coordinates.transpose()
    projected_coordinates = np.array(projected_coordinates, dtype=np.int32)

    return projected_coordinates

def get_proj_2d_box(model_points, pred_rot, pred_tran, intrinsic):
    choose = np.random.choice(np.arange(len(model_points)), 512)
    pts_3d = model_points[choose].T
    transformed_pts_3d = pred_rot@pts_3d + pred_tran[:,np.newaxis]
    projected_pts = calculate_2d_projections(transformed_pts_3d, intrinsic)
    x_min, y_min = np.min(projected_pts, axis=0)
    x_max, y_max = np.max(projected_pts, axis=0)
    return x_min, y_min, x_max, y_max

def get_vertices_3d(mesh):
    max_corner = mesh.bounds[1]  # (xmax, ymax, zmax)
    min_corner = mesh.bounds[0]  # (xmin, ymin, zmin)

    # Compute width, height, and depth
    width  = max_corner[0] - min_corner[0]  # X-axis
    height = max_corner[1] - min_corner[1]  # Y-axis
    depth  = max_corner[2] - min_corner[2]  # Z-axis

    p1 = np.array([-width/2, -height/2, -depth/2])
    p2 = np.array([width/2, -height/2, -depth/2])
    p3 = np.array([width/2, height/2, -depth/2])
    p4 = np.array([-width/2, height/2, -depth/2])
    p5 = np.array([-width/2, -height/2, depth/2])
    p6 = np.array([width/2, -height/2, depth/2])
    p7 = np.array([width/2, height/2, depth/2])
    p8 = np.array([-width/2, height/2, depth/2])

    P = np.array([p1, p2, p3, p4, p5, p6, p7, p8])

    return P

def get_annotated_bbox(mask):
    bbox = []
    for i in np.unique(mask):
        if i == 0:
            continue
        mask_i = mask.copy()
        mask_i[mask_i != i] = 0
        y, x = np.where(mask_i == i)
        bbox.append((min(x), min(y), max(x), max(y)))
    return bbox

def calc_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    union = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * (box2[3] - box2[1]) - intersection

    return intersection / union

In [4]:
mesh_file = "/media/giakhang/OS/dataset/piano_6d/track_04/mesh/LUMI.obj"
mesh = trimesh.load(mesh_file)
P = mesh.sample(1024).astype(np.float32)

#P = get_vertices_3d(mesh)

intr = np.loadtxt(str(intr_path)).reshape(3,3)

In [5]:
remove_img_name = [
    "00033.png",
    "00034.png",
    "00035.png",
    "00036.png",
    "00037.png",
    "00038.png",
    "00039.png",
    "00040.png",
    "00041.png",
    "00042.png",
    "00072.png",
    "00118.png",
    "00119.png",
    "00120.png",
    "00122.png",
    "00136.png",
    "00137.png",
    "00140.png",
    "00149.png",
    "00156.png",
    "00157.png",
    "00158.png",
    "00159.png",
    "00160.png",
    "00161.png",
    "00162.png",
    "00163.png",
    "00164.png",
    "00165.png",
    "00166.png",
    "00167.png",
    "00168.png",
    "00169.png",
    "00170.png",
    "00171.png",
    "00174.png",
    "00177.png",
    "00181.png",
    "00186.png",
    "00187.png",
    "00189.png",
    "00198.png",
    "00199.png",
    "00227.png",
    "00228.png",
    "00229.png",
    "00249.png",
    "00250.png",
    "00251.png",
    "00252.png",
    "00253.png",
    "00254.png",
    "00255.png",
    "00256.png",
    "00257.png",
    "00258.png",
    "00259.png",
    "00260.png",
    "00261.png",
    "00262.png",
    "00263.png",
    "00264.png",
    "00265.png",
    "00266.png",
    "00267.png",
    "00268.png",
    "00269.png",
    "00270.png",
    "00271.png",
    "00272.png",
    "00273.png",
    "00274.png",
    "00275.png",
    "00276.png",
    "00277.png",
    "00278.png",
    "00279.png",
    "00280.png",
    "00281.png",
    "00282.png",
    "00283.png",
    "00284.png",
    "00285.png",
    "00286.png",
    "00287.png",
    "00288.png",
    "00289.png",
    "00290.png",
    "00291.png",
    "00292.png",
    "00293.png",
    "00294.png",
    "00295.png",
    "00296.png",
    "00297.png",
    "00298.png",
    "00299.png",
    "00300.png",
    "00301.png",
    "00302.png",
    "00303.png",
    "00304.png",
    "00305.png",
    "00306.png",
    "00364.png",
    "00376.png",
    "00377.png",
    "00379.png",
    "00380.png",
    "00381.png",
    "00382.png",
    "00383.png",
    "00384.png",
    "00385.png",
    "00386.png",
    "00387.png",
    "00388.png",
    "00389.png",
    "00390.png",
    "00391.png",
    "00392.png",
    "00393.png",
    "00394.png",
    "00395.png",
    "00396.png",
    "00397.png",
    "00398.png",
    "00399.png",
    "00400.png",
    "00401.png",
    "00402.png",
    "00403.png",
    "00404.png",
    "00405.png",
    "00406.png",
    "00407.png",
    "00408.png",
    "00409.png",
    "00410.png",
    "00411.png",
    "00412.png",
    "00413.png",
    "00414.png",
    "00415.png",
    "00416.png",
    "00417.png",
    "00418.png",
    "00419.png",
    "00420.png",
    "00421.png",
    "00422.png",
    "00423.png",
    "00424.png",
    "00425.png",
    "00426.png",
    "00427.png",
    "00428.png",
    "00429.png",
    "00430.png",
    "00431.png",
    "00432.png",
    "00433.png",
    "00434.png",
    "00435.png",
    "00436.png",
    "00437.png",
    "00438.png",
    "00439.png",
    "00440.png",
    "00441.png",
    "00442.png",
    "00443.png",
    "00444.png"
]

In [6]:
choose_img = []

scene_gt_info = {}
scene_camera = {}
scene_gt = {}

for f in ism_mask_files:
    img_id = f[:-4]
    if f in remove_img_name:
        continue
    pose_path = pose_result_dir / f.replace(".png", ".gt.json")
    ism_mask_file = ism_mask_dir / f

    if not pose_path.exists():
        continue

    with open(pose_path, 'r') as file:
        list_pose = json.load(file)
    
    projected_2d_boxes = []
    for pose in list_pose:
        rot_mat = np.array(pose["cam_R_m2c"]).reshape(3, 3)
        trans_mat = np.array(pose["cam_t_m2c"]).reshape(3, 1)
        trans_mat_m = trans_mat / 1e3
        #P = get_vertices_3d(mesh)
        #projected_2d_boxes.append(calc_projected_2d_box(P, rot_mat, trans_mat_m, intr))
        proj_2d_box = get_proj_2d_box(P * 1000.0, rot_mat, trans_mat.squeeze(), intr)
        projected_2d_boxes.append(proj_2d_box)

    mask = cv2.imread(str(ism_mask_file), -1)
    if np.unique(mask).shape[0] != 3 and len(projected_2d_boxes) == 2:
        continue

    choose_img.append(img_id)

    annotated_bboxes = get_annotated_bbox(mask)

    if len(projected_2d_boxes) == len(annotated_bboxes) == 2:
        ious = []
        for bbox_idx, bbox in enumerate(annotated_bboxes):
            proj_box = projected_2d_boxes[0]
            ious.append(calc_iou(proj_box, bbox))
    
        match_idxs = [np.argmax(ious), 1 - np.argmax(ious)]
    else:
        match_idxs = [0]

    box_color = [(0, 0, 255), (255, 0, 0)]
    img = cv2.imread(str(rgb_src_dir / f))
    for i , box in enumerate(annotated_bboxes):
        x_min, y_min, x_max, y_max = box
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), box_color[i][::-1], 2)

    for i, box in enumerate(projected_2d_boxes):
        x_min, y_min, x_max, y_max = box
        cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), box_color[match_idxs[i]][::-1], 2)

    imageio.imwrite(str(debug_dir / f), img[..., ::-1])

    if len(match_idxs) == 2:
        list_pose[0], list_pose[1] = list_pose[match_idxs[0]], list_pose[match_idxs[1]] 

    img_gt_info = []
    img_gt = []

    for i in np.unique(mask):
        if i == 0 or i > len(list_pose):
            continue
        mask_i = mask.copy()
        mask_i[mask_i != i] = 0
        mask_i[mask_i == i] = 1
        mask_i = mask_i.astype(np.uint8)
        y, x = np.where(mask_i == 1)
        x1, y1, x2, y2 = min(x), min(y), max(x), max(y)
        w = x2 - x1
        h = y2 - y1

        px_count_visib = int(np.sum(mask_i).item())
        px_count_valid = int(w * h)
        visib_fract = px_count_visib / px_count_valid

        img_gt_info.append({
            "bbox_obj": [int(x1), int(y1), int(w), int(h)],
            "bbox_visib": [int(x1), int(y1), int(w), int(h)],
            "px_count_all": px_count_valid, 
            "px_count_valid": px_count_valid, 
            "px_count_visib": px_count_visib, 
            "visib_fract": visib_fract
        })

        img_gt.append({
            "cam_R_m2c": list_pose[i-1]["cam_R_m2c"],
            "cam_t_m2c": list_pose[i-1]["cam_t_m2c"],
            "obj_id": int(list_pose[i-1]["obj_id"]) + 1
        })
    
    scene_camera[str(int(img_id))] = cam_info
    scene_gt_info[str(int(img_id))] = img_gt_info
    scene_gt[str(int(img_id))] = img_gt

    shutil.copy(str(rgb_src_dir / f), str(dest_rgb / f))
    shutil.copy(str(depth_src_dir / f), str(dest_depth / f))
    for i in np.unique(mask):
        if i == 0:
            continue
        mask_i = mask.copy()
        mask_i[mask_i != i] = 0
        mask_i[mask_i == i] = 255
        mask_i = mask_i.astype(np.uint8)
        cv2.imwrite(str(dest_mask_visib / f"{img_id}_{i-1:05d}.png"), mask_i)
        cv2.imwrite(str(dest_mask / f"{img_id}_{i-1:05d}.png"), mask_i)


In [7]:
with open(scene_camera_file, "w") as f:
    json.dump(scene_camera, f)

In [8]:
with open(scene_gt_info_file, "w") as f:
    json.dump(scene_gt_info, f)

In [9]:
with open(scene_gt_file, "w") as f:
    json.dump(scene_gt, f)

In [10]:
rgb_files = choose_img.copy()

In [11]:
def split_list(numbers, split_ratio=0.5):
    size = int(len(numbers) * split_ratio)  # Size of the first set
    first_set = random.sample(numbers, size)  # Randomly select elements
    second_set = [num for num in numbers if num not in first_set]  # Remaining elements
    return first_set, second_set

test_set, train_set = split_list(rgb_files, 0.1)

In [12]:
train_set = sorted(train_set)
test_set = sorted(test_set)

In [13]:
img_set_dir = f"/home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/data/{track_num}/image_set"

os.makedirs(img_set_dir, exist_ok=True)

lumi_piano_all_file = img_set_dir + "/lumi_piano_all.txt"
lumi_piano_train_file = img_set_dir + "/lumi_piano_train.txt"
lumi_piano_test_file = img_set_dir + "/lumi_piano_test.txt"

In [14]:
with open(lumi_piano_all_file, "w") as file:
    file.writelines([f"{value}\n" for value in rgb_files])

In [15]:
with open(lumi_piano_train_file, "w") as file:
    file.writelines([f"{value}\n" for value in train_set])

In [16]:
with open(lumi_piano_test_file, "w") as file:
    file.writelines([f"{value}\n" for value in test_set])

In [17]:
!python3 tools/lumi_piano/lumi_piano_gen_xyz_crop.py --track {num} --height 400 --width 640 

using egl
100%|█████████████████████████████████████████| 272/272 [00:25<00:00, 10.79it/s]
split train track 7 total time:  25.212674264003


In [29]:
import trimesh

mesh = trimesh.load_mesh("/media/giakhang/OS/dataset/piano_6d/track_04/mesh/LUMI.obj")

if hasattr(mesh.visual, 'uv') and hasattr(mesh.visual, 'material') and mesh.visual.material.image:
    mesh.visual = mesh.visual.to_color()

mesh.export("/media/giakhang/OS/dataset/piano_6d/piano.ply", file_type='ply', encoding='ascii')


b'ply\nformat ascii 1.0\ncomment https://github.com/mikedh/trimesh\nelement vertex 25642\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nproperty uchar alpha\nelement face 25758\nproperty list uchar int vertex_indices\nend_header\n0.12180000 0.01280000 0.05810000 125 131 137 255\n0.12180000 0.01280000 0.05810000 125 131 137 255\n0.12180000 0.01290000 0.05810000 125 131 137 255\n0.12180000 0.01290000 0.05810000 125 131 137 255\n0.12180000 0.01190000 0.06330000 28 32 35 255\n0.12180000 0.01190000 0.06330000 28 32 35 255\n0.12180000 0.01210000 0.06350000 7 7 8 255\n0.12180000 0.01210000 0.06350000 7 7 8 255\n0.12180000 0.01290000 0.06350000 7 7 8 255\n0.12180000 0.01290000 0.06350000 7 7 8 255\n0.12180000 0.01280000 0.05610000 101 111 118 255\n0.12180000 0.01280000 0.05610000 101 111 118 255\n0.12180000 0.01290000 0.05610000 101 111 118 255\n0.12180000 0.01290000 0.05610000 101 111 118 255\n0.12180000 0.01190000 0.05090

In [5]:
!ln -s /media/giakhang/OS/dataset/piano_6d/piano.ply ./datasets/lumi_piano_dataset/models/obj_000001.ply

In [3]:
!ln -s /home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/models/obj_000001.ply ./datasets/lumi_piano_dataset/models_eval