In [3]:
from pipelearn.models.yolo6D.data.dataset import Pose6DDataset, category_id_mapping
from pipelearn.models.yolo6D.data.vis import Visualize, VisualizationConfig, ColorScheme
import matplotlib.pyplot as plt
from pipelearn.embdata.embdata.sense.world_object import WorldObject
from embdata.coordinate import BBox2D
from embdata.sense.camera import Camera, Intrinsics, Extrinsics
from embdata.geometry import Transform3D
import numpy as np
from embdata.sense.world import World
from embdata.sense.image import Image as MBImage
from embdata.sense.object_segmentation_agent import SegmentationAgent

def create_world_from_sample(sample):
    segmentation_agent = SegmentationAgent()
    
    # Add image to world
    image = sample['image'].permute(1, 2, 0).numpy()
    
    # Create a World object
    world = World(image=MBImage(array=image))

    camera_info = sample['camera']

    # Add camera to world
    camera = Camera(
        intrinsic=Intrinsics(
            fx=camera_info['intrinsic']['fx'],
            fy=camera_info['intrinsic']['fy'],
            cx=camera_info['intrinsic']['cx'],
            cy=camera_info['intrinsic']['cy'],
        ),
        extrinsic=Extrinsics(
            rotation=camera_info['extrinsic']['rotation'],
            translation=camera_info['extrinsic']['translation'],
        )
    )
    
    world.camera = camera

    world_objects = []
    
    # Add objects to world
    for box, label, rotation, tz, cx_cy in zip(
        sample['bboxes'],
        sample['labels'],
        sample['rotations'],
        sample['tzs'],
        sample['cx_cys'],
    ):
        
        # Set object name/category
        category_id = label.item()
        
        # Set 2D bounding box
        cx, cy, w, h = box
        x1 = (cx - w/2) * image.shape[1]
        y1 = (cy - h/2) * image.shape[0]
        x2 = (cx + w/2) * image.shape[1]
        y2 = (cy + h/2) * image.shape[0]
        bbox_2d = BBox2D(x1=x1, y1=y1, x2=x2, y2=y2)

        cx = cx_cy[0] * image.shape[1]
        cy = cx_cy[1] * image.shape[0]
        tz = tz

        cx = cx_cy[0] * image.shape[1]  # Image coordinates
        cy = cx_cy[1] * image.shape[0]
        # Create homogeneous coordinates [cx, cy, 1]
        point_2d = np.array([cx, cy, 1])

        # Get camera intrinsics
        K = np.array([
            [camera_info['intrinsic']['fx'], 0, camera_info['intrinsic']['cx']],
            [0, camera_info['intrinsic']['fy'], camera_info['intrinsic']['cy']],
            [0, 0, 1]
        ])

        # Calculate 3D point: t = K^-1 * tz * [cx, cy, 1]^T
        point_3d = tz * np.linalg.inv(K) @ point_2d

        tx = point_3d[0].item()
        ty = point_3d[1].item()
        tz = point_3d[2].item()

        try:
            _, mask = segmentation_agent.act(world.image, bbox_2d)
        except Exception as e:
            print("Failed for object: ", bbox_2d)
            continue

        wo = WorldObject(
            name=category_id_mapping[str(label.item())]['category_name'],
            bbox_2d=bbox_2d,
            pose=Transform3D(rotation=rotation.numpy(), translation=np.array([tx, ty, tz])).pose(),
        )
        wo.mask = mask
        world.add_object(wo)
        
    
    return world

def visualize_dataset_samples():
    # Initialize dataset and visualization
    dataset = Pose6DDataset(split="train")
    vis_config = VisualizationConfig(
        masks=False,
        labels=True,
        bbox2d=False,
        bbox3d=True,
        axes=True,
        font_scale=0.7,  # Slightly larger font
        line_thickness=2,
    )
    visualizer = Visualize(config=vis_config)
    
    # Process first few samples
    for idx in range(3):  # Show first 3 images
        sample = dataset[idx]
        
        # Create world object from sample
        world = create_world_from_sample(sample)
        
        # Visualize using the Visualize class
        annotated_image = visualizer.show(world)
        
        # Display the image
        plt.figure(figsize=(15, 10))
        plt.imshow(annotated_image)
        plt.axis('off')
        plt.show()

In [6]:
visualize_dataset_samples()

Resolving data files:   0%|          | 0/78 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/78 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/72 [00:00<?, ?it/s]

Loaded as API: https://api.mbodi.ai/sense/ ✔


Object: glue
Pose: Pose6D(
│   [36m'x[36m': [36m17.338592367339515[0m,
│   [36m'y[36m': [36m56.3638763046614[0m,
│   [36m'z[36m': [36m1472.7637939453125[0m,
│   [36m'roll[36m': [36m0.0[0m,
│   [36m'pitch[36m': [36m0.0[0m,
│   [36m'yaw[36m': [36m0.0[0m
)
Bbox 3D: None
Bbox 2D: BBox2D([36m'x1[36m': [36m330.5[0m, [36m'y1[36m': [36m249.5[0m, [36m'x2[36m': [36m333.5000305175781[0m, [36m'y2[36m': [36m278.5[0m)
