In [1]:
!pwd

/home/giakhang/dev/RDPN6D


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import random
import glob
import os
import shutil 
import os.path as osp
from pathlib import Path
import imageio
from tqdm import tqdm
import json
from scipy.spatial.transform import Rotation
import trimesh
from scipy.spatial.distance import pdist

# Raw -> RDPN6D

In [2]:
def load_masks(mask_path):
    mask_idxs = np.load(mask_path)
    masks = np.zeros(shape=mask_idxs[0], dtype=np.uint8)
    masks[mask_idxs[1:, 0], mask_idxs[1:, 1], mask_idxs[1:, 2]] = 1
    return masks

def draw_seg(image, annotations):
    for ann in annotations:
        x_min, y_min, x_max, y_max = ann["bbox"]
        polygon = ann["polygon"]

        # Draw bounding box
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # Draw segmentation mask
        cv2.polylines(image, [polygon], isClosed=True, color=(0, 0, 255), thickness=2)
    
    return image

def mask_to_rgb(instance_masks):
    """
    Convert an instance segmentation mask to an RGB image.
    
    Args:
        instance_masks (np.array): Shape (num_instances, H, W), where each instance is a separate mask.

    Returns:
        np.array: RGB image with shape (H, W, 3).
    """
    num_instances, H, W = instance_masks.shape
    rgb_image = np.zeros((H, W, 3), dtype=np.uint8)

    # Generate random colors for each instance
    colors = [
        (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        for _ in range(num_instances)
    ]

    # Draw each instance with a different color
    for i in range(num_instances):
        mask = instance_masks[i]
        color = colors[i]
        for c in range(3):  # Apply color to all RGB channels
            rgb_image[:, :, c] = np.where(mask > 0, color[c], rgb_image[:, :, c])

    return rgb_image

In [3]:
RAW_DATA_DIR = Path("/media/giakhang/OS/dataset/piano_6d/LUMI_BDDS_dataset")
NUM = 6
SCENE_DIR = RAW_DATA_DIR / f"synthetic_data{NUM}"
SRC_DEPTH_DIR = SCENE_DIR / "depths"

DES_DIR = Path("/media/giakhang/OS/dataset/piano_6d/synthetic_data")

if not osp.exists(DES_DIR):
    os.makedirs(DES_DIR)

DES_DEPTH_DIR = DES_DIR / f"synthetic_data{NUM}" / "depth"
os.makedirs(DES_DEPTH_DIR, exist_ok=True)

In [4]:
depth_paths = glob.glob(str(SRC_DEPTH_DIR / "*.png"))

for f in tqdm(depth_paths):
    depth = cv2.imread(str(f), -1).astype(np.uint16)
    depth = depth[..., 0]
    assert depth.shape == (720, 1280)
    new_size = (1280 // 2, 720 // 2)
    depth = cv2.resize(depth, new_size, interpolation=cv2.INTER_NEAREST)
    assert depth.shape == (360, 640)
    imageio.imwrite(DES_DEPTH_DIR / f.split("/")[-1], depth)

100%|██████████| 1362/1362 [03:15<00:00,  6.97it/s]


In [5]:
SRC_RGB_DIR = SCENE_DIR / "images"

DES_RGB_DIR = DES_DIR / f"synthetic_data{NUM}" / "rgb"
os.makedirs(DES_RGB_DIR, exist_ok=True)

In [6]:
rgb_paths = glob.glob(str(SRC_RGB_DIR / "*.png"))

for f in tqdm(rgb_paths):
    img = cv2.imread(str(f))
    assert img.shape[:2] == (720, 1280)
    new_size = (1280 // 2, 720 // 2)
    img = cv2.resize(img, new_size)
    assert img.shape[:2] == (360, 640)
    imageio.imwrite(DES_RGB_DIR / f.split("/")[-1], img)

100%|██████████| 1362/1362 [10:33<00:00,  2.15it/s]


In [7]:
SRC_MASK_DIR = SCENE_DIR / "masks"

DES_MASK_DIR = DES_DIR / f"synthetic_data{NUM}" / "mask"
DES_MASK_VISIB_DIR = DES_DIR / f"synthetic_data{NUM}" / "mask_visib"

os.makedirs(DES_MASK_DIR, exist_ok=True)
os.makedirs(DES_MASK_VISIB_DIR, exist_ok=True)

In [8]:
rgb_filenames = os.listdir(SRC_RGB_DIR)
rgb_filenames = [f for f in rgb_filenames if f.endswith(".png")]

for fn in tqdm(rgb_filenames):
    int_img_idx = int(fn[:-4])
    raw_mask_f = SRC_MASK_DIR / fn.replace("png", "npy")
    mask = load_masks(raw_mask_f)
    num_ins = mask.shape[0]
    for ins_idx in range(num_ins):
        ins = mask[ins_idx, ...].copy()
        ins = ins.astype(np.uint8)
        ins[ins == 1] = 255

        assert ins.shape == (720, 1280)
        new_size = (1280 // 2, 720 // 2)
        ins = cv2.resize(ins, new_size, interpolation=cv2.INTER_NEAREST)
        assert img.shape[:2] == (360, 640)
        if np.sum(ins) != 0:
            assert np.unique(ins).tolist() == [0, 255]

        cv2.imwrite(str(DES_MASK_DIR / f"{int_img_idx:05d}_{ins_idx:05d}.png"), ins)
        cv2.imwrite(str(DES_MASK_VISIB_DIR / f"{int_img_idx:05d}_{ins_idx:05d}.png"), ins)

  0%|          | 0/1362 [00:00<?, ?it/s]

100%|██████████| 1362/1362 [00:47<00:00, 28.95it/s]


In [9]:
scene_camera_file = DES_DIR / f"synthetic_data{NUM}" / "scene_camera.json"

img_idxs = sorted([int(fn[:-4]) for fn in rgb_filenames])

scene_camera = dict()

for img_idx in tqdm(img_idxs):
    src_file = str(SCENE_DIR / "metas" / f"{img_idx:05d}.json")
    with open(src_file, "r") as f:
        annot = json.load(f)


    intr = np.identity(3, dtype=np.float32)
    fx = annot["camera_info"]["intrinsics"]["fx"] / 2
    fy = annot["camera_info"]["intrinsics"]["fy"] / 2
    cx = annot["camera_info"]["intrinsics"]["cx"] / 2
    cy = annot["camera_info"]["intrinsics"]["cy"] / 2
    depth_scale = annot["camera_info"]["intrinsics"]["depth_scale"]

    intr[0, 0] = fx
    intr[0, -1] = cx
    intr[1, 1] = fy
    intr[1, -1] = cy

    scene_camera[str(img_idx)] = {
        "cam_K": intr.flatten().tolist(),
        "depth_scale": depth_scale
    }

with open(scene_camera_file, "w") as f:
    json.dump(scene_camera, f)
    

100%|██████████| 1362/1362 [00:00<00:00, 2155.27it/s]


In [10]:
scene_gt_info_file = DES_DIR / f"synthetic_data{NUM}" / "scene_gt_info.json"

img_idxs = sorted([int(fn[:-4]) for fn in rgb_filenames])

scene_gt_info = dict()

for img_idx in tqdm(img_idxs):
    mask_file = SCENE_DIR / "masks" / f"{img_idx:05d}.npy"
    mask = load_masks(mask_file)

    gt = []
    for ins_idx in range(mask.shape[0]):
        m = mask[ins_idx, ...].copy()

        assert m.shape == (720, 1280)
        new_size = (1280 // 2, 720 // 2)
        m = cv2.resize(m, new_size, interpolation=cv2.INTER_NEAREST)
        assert img.shape[:2] == (360, 640)

        if int(np.sum(m).item()) == 0:
            gt.append({
                "bbox_obj": [-1, -1, -1, -1],
                "bbox_visib": [-1, -1, -1, -1],
                "px_count_all": 0.0, 
                "px_count_valid": 0.0, 
                "px_count_visib": 0.0, 
                "visib_fract": 0.0
            })
            continue

        assert np.unique(m).tolist() == [0, 1], np.unique(m).tolist()
        y, x = np.where(m)
        x1, y1, x2, y2 = int(min(x)), int(min(y)), int(max(x)), int(max(y))
        w = x2 - x1
        h = y2 - y1
        px_count_all = int(np.sum(m).item())
        px_count_valid = int(np.sum(m).item())
        px_count_visib = int(np.sum(m).item())
        visib_fract = px_count_visib / px_count_valid
        gt.append({
            "bbox_obj": [int(x1), int(y1), int(w), int(h)],
            "bbox_visib": [int(x1), int(y1), int(w), int(h)],
            "px_count_all": px_count_valid, 
            "px_count_valid": px_count_valid, 
            "px_count_visib": px_count_visib, 
            "visib_fract": visib_fract
        })
    
    scene_gt_info[str(img_idx)] = gt

with open(scene_gt_info_file, "w") as f:
    json.dump(scene_gt_info, f)

100%|██████████| 1362/1362 [01:11<00:00, 19.07it/s]


In [11]:
def format_rotation(rot_mat):
    rot_t = Rotation.from_euler('xyz', [np.pi / 2, 0, 0]).as_matrix()
    return np.dot(rot_mat, rot_t)

In [12]:
scene_gt_file = DES_DIR / f"synthetic_data{NUM}" / "scene_gt.json"

img_idxs = sorted([int(fn[:-4]) for fn in rgb_filenames])

scene_gt = dict()

for img_idx in tqdm(img_idxs):
    raw_gt_file = str(SCENE_DIR / "metas" / f"{img_idx:05d}.json")
    with open(raw_gt_file, "r") as f:
        annot = json.load(f)
    
    obj_infos = annot["object_infos"]
    num_ins = len(obj_infos)

    gt = []
    for ins_idx in range(num_ins):
        obj_info = obj_infos[ins_idx]
        gt.append({
            "cam_R_m2c": format_rotation(np.array(obj_info["cam_R_m2c"], dtype=np.float32)).flatten().tolist(),
            "cam_t_m2c": (np.array(obj_info["cam_t_m2c"], dtype=np.float32) * 1000).tolist(),
            "obj_id": 1
        })

    scene_gt[str(img_idx)] = gt

with open(scene_gt_file, "w") as f:
    json.dump(scene_gt, f)

100%|██████████| 1362/1362 [00:00<00:00, 2352.25it/s]


# RDPN6D requires

In [13]:
!ln -s /media/giakhang/OS/dataset/piano_6d/synthetic_data/synthetic_data6 ./datasets/syn_lumi_piano_dataset/data

In [23]:
num = 6

src_dir = Path(f"/home/giakhang/dev/RDPN6D/datasets/syn_lumi_piano_dataset/data/synthetic_data{num}") 

rgb_dir = src_dir / "rgb"

In [24]:
rgb_files = os.listdir(rgb_dir)
rgb_files = [f for f in rgb_files if f.endswith(".png")]
rgb_files = sorted([f[:-4] for f in rgb_files])

In [25]:
image_set_dir = Path(f"/media/giakhang/OS/dataset/piano_6d/synthetic_data/synthetic_data{num}/image_set")

os.makedirs(image_set_dir, exist_ok=True)

syn_lumi_piano_all_file = image_set_dir / "syn_lumi_piano_all.txt"
syn_lumi_piano_train_file = image_set_dir / "syn_lumi_piano_train.txt"

In [26]:
with open(syn_lumi_piano_all_file, "w") as file:
    file.writelines([f"{value}\n" for value in rgb_files])

In [27]:
with open(syn_lumi_piano_train_file, "w") as file:
    file.writelines([f"{value}\n" for value in rgb_files])

In [2]:
def get_model_info(file_path):
    mesh = trimesh.load(file_path)

    # Get bounding box extents
    min_bounds = mesh.bounds[0]
    max_bounds = mesh.bounds[1]
    
    size_x, size_y, size_z = max_bounds - min_bounds
    min_x, min_y, min_z = min_bounds

    # Compute diameter as the max pairwise Euclidean distance between vertices
    vertices = mesh.vertices
    diameter = np.max(pdist(vertices))  # Computes all pairwise distances

    return {
        "diameter": float(round(diameter, 6)),
        "min_x": float(round(min_x, 6)),
        "min_y": float(round(min_y, 6)),
        "min_z": float(round(min_z, 6)),
        "size_x": float(round(size_x, 6)),
        "size_y": float(round(size_y, 6)),
        "size_z": float(round(size_z, 6)),
    }

In [5]:
print(get_model_info("/home/giakhang/dev/RDPN6D/datasets/lumi_piano_dataset/models/obj_000001.ply"))

{'diameter': 0.314086, 'min_x': -0.1414, 'min_y': -0.0129, 'min_z': -0.0706, 'size_x': 0.2824, 'size_y': 0.0258, 'size_z': 0.1412}
