In [None]:
from pathlib import Path

path = Path("./data")


In [None]:
import json
from pathlib import Path
from collections import OrderedDict

file_contents = {}
for json_f in path.rglob("sa*.json"):
    json_contents = json.loads(json_f.read_text())
    print(len(json_contents))
    file_contents.update(json_contents)
    # break

# Sort the file_contents by the 'prompt' field
sorted_contents = OrderedDict(
    sorted(file_contents.items(), key=lambda item: int(item[1]['prompt']))
)

output = Path("./data") / "lincheng"
output.mkdir(exist_ok=True)
with open(output / "saved_data.json", "w") as f:
    json.dump(sorted_contents, f, indent=4)


# yolo finetun using different dataset

In [None]:
from tqdm import tqdm

from ultralytics.utils.checks import check_requirements
from ultralytics.utils.downloads import download
from ultralytics.utils.ops import xyxy2xywhn

import numpy as np
from pathlib import Path

check_requirements(('pycocotools>=2.0',))
from pycocotools.coco import COCO

# Make Directories
dir = Path('/home/capre/disk_4/yutao/ultralytics/datasets/Objects365')  # dataset root dir
for p in 'images', 'labels':
    (dir / p).mkdir(parents=True, exist_ok=True)
    for q in 'train', 'val':
        (dir / p / q).mkdir(parents=True, exist_ok=True)

# Train, Val Splits
for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
    print(f"Processing {split} in {patches} patches ...")
    images, labels = dir / 'images' / split, dir / 'labels' / split

    # Download
    url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
    if split == 'train':
        download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir)  # annotations json
        download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
    elif split == 'val':
        download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir)  # annotations json
        download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
        download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)

    # Move
    for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
        f.rename(images / f.name)  # move to /images/{split}

    # Labels
    coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
    names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
    for cid, cat in enumerate(names):
        catIds = coco.getCatIds(catNms=[cat])
        imgIds = coco.getImgIds(catIds=catIds)
        for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
            width, height = im["width"], im["height"]
            path = Path(im["file_name"])  # image filename
            try:
                with open(labels / path.with_suffix('.txt').name, 'a') as file:
                    annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
                    for a in coco.loadAnns(annIds):
                        x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
                        xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
                        x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
                        file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
            except Exception as e:
                print(e)


In [None]:
import os
from ultralytics import YOLO
# os.chdir("/home/capre/disk_4/yutao/ultralytics")
# Load a model
model = YOLO("/home/capre/Point-Cloud-Stream/runs/segment/train6/weights/best.pt")  # load a pretrained model (recommended for training)

# Train the model
# results = model.train(data="Objects365.yaml", epochs=100, imgsz=640)
image_path = '/home/capre/disk_4/yutao/data/resources/0a7ee4d0cf344e15a81c68be0be1fb96_color_2.png'
res= model.predict(source=image_path, save=False, verbose=False)
res[0].show()

# yolo finetune for breast seg

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11x-seg.pt")  # load a pretrained model (recommended for training)

# Train the model
results = model.train(data="/home/capre/disk_4/yutao/breast-seg/dataset3/dataset.yaml", epochs=100, imgsz=640)

# masks filename align

In [None]:
import os

def add_leading_zeros_to_masks(masks_dir, total_digits=4):
    """
    将 masks_dir 中的文件名加上前导零，使其成为指定长度的数字字符串。

    参数：
    - masks_dir: 掩码文件夹的路径
    - total_digits: 文件名应达到的总位数，默认为4
    """
    for filename in os.listdir(masks_dir):
        # 分离文件名和扩展名
        basename, extension = os.path.splitext(filename)
        try:
            # 将文件名转换为整数，以确保文件名是数字
            number = int(basename)
            # 格式化新的文件名，添加前导零
            new_basename = f"{number:0{total_digits}d}"
            new_filename = new_basename + extension
            # 构建完整的源和目标路径
            src = os.path.join(masks_dir, filename)
            dst = os.path.join(masks_dir, new_filename)
            # 重命名文件
            os.rename(src, dst)
            print(f"重命名：{filename} -> {new_filename}")
        except ValueError:
            print(f"跳过非数字文件名：{filename}")

# 使用示例：
masks_dir = '/home/capre/disk_4/yutao/breast-seg/masks'  # 替换为您的 masks 文件夹路径
add_leading_zeros_to_masks(masks_dir)


# yolo dataset make

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
import shutil

# Define paths
images_dir = '/home/capre/disk_4/yutao/breast-seg/images'  # Replace with your images directory
masks_dir = '/home/capre/disk_4/yutao/breast-seg/masks'      # Replace with your masks directory
dataset_root = '/home/capre/disk_4/yutao/breast-seg/dataset2'  # Replace with your dataset root directory

# Create necessary directories
os.makedirs(os.path.join(dataset_root, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'labels', 'val'), exist_ok=True)

# Get list of image files
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Shuffle and split into train and val
random.shuffle(image_files)
split_index = int(len(image_files) * 0.8)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

def process_dataset(phase, files):
    for image_file in files:
        # Read image and mask
        image_path = os.path.join(images_dir, image_file)
        mask_path = os.path.join(masks_dir, image_file)  # Assuming mask has the same name
        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Check if mask exists
        if mask is None:
            print(f"Mask not found for image {image_file}")
            continue

        height, width = mask.shape
        # Threshold mask to binary
        _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)

        # Find contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Create label file
        label_file = os.path.splitext(image_file)[0] + '.txt'
        label_path = os.path.join(dataset_root, 'labels', phase, label_file)

        with open(label_path, 'w') as f:
            for contour in contours:
                # Simplify contour
                epsilon = 0.001 * cv2.arcLength(contour, True)
                contour = cv2.approxPolyDP(contour, epsilon, True)

                # Flatten contour array
                contour = contour.squeeze()
                if contour.ndim != 2:
                    continue  # Skip if contour is not 2D

                # Normalize coordinates
                normalized_contour = contour.astype(np.float32)
                normalized_contour[:, 0] /= width
                normalized_contour[:, 1] /= height

                # Flatten and convert to list
                contour_list = normalized_contour.flatten().tolist()

                # Write to file (class index is 0)
                line = '0 ' + ' '.join(map(str, contour_list))
                f.write(line + '\n')

        # Copy image to dataset folder
        shutil.copy(image_path, os.path.join(dataset_root, 'images', phase, image_file))

        # Visualization (optional)
        # if random.random() < 0.05:  # Adjust the probability as needed
        #     # Plot image and contours
        #     plt.figure(figsize=(10, 10))
        #     plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        #     for contour in contours:
        #         contour = contour.squeeze()
        #         plt.plot(contour[:, 0], contour[:, 1], linewidth=2)
        #     plt.title(f"{phase} - {image_file}")
        #     plt.axis('off')
        #     plt.show()

# Process train and val datasets
process_dataset('train', train_files)
process_dataset('val', val_files)

# Generate dataset YAML file
dataset_yaml = os.path.join(dataset_root, 'dataset.yaml')
with open(dataset_yaml, 'w') as f:
    f.write(f"path: {dataset_root}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write("test: \n\n")
    f.write("names:\n")
    f.write("  0: object\n")


# add label from predict

In [None]:
import os
import cv2
import numpy as np
import random
import shutil
from ultralytics import YOLO

# Define paths
images_dir = '/home/capre/disk_4/yutao/breast-seg/images'  # Replace with your images directory
masks_dir = '/home/capre/disk_4/yutao/breast-seg/masks'      # Replace with your masks directory
dataset_root = '/home/capre/disk_4/yutao/breast-seg/dataset3'  # Replace with your dataset root directory

# Create necessary directories
os.makedirs(os.path.join(dataset_root, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(dataset_root, 'labels', 'val'), exist_ok=True)

# Get list of image files
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Shuffle and split into train and val
random.shuffle(image_files)
split_index = int(len(image_files) * 0.8)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

# Load the YOLO model
model = YOLO('yolo11x-seg.pt')  # Replace with the correct path to your model

# Get class names and assign a new class ID for your mask label
class_names = model.names  # A dictionary {class_id: class_name}
max_class_id = max(class_names.keys())
mask_class_id = max_class_id + 1
class_names[mask_class_id] = 'breast'  # Replace 'breast' with your class name

def process_dataset(phase, files):
    for image_file in files:
        # Read image and mask
        image_path = os.path.join(images_dir, image_file)
        mask_path = os.path.join(masks_dir, image_file)  # Assuming mask has the same name
        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Check if mask exists
        if mask is None:
            print(f"Mask not found for image {image_file}")
            continue

        original_height, original_width = image.shape[:2]

        # Run the model to get predictions
        results = model.predict(source=image, save=False, verbose=False)
        result = results[0]

        # Create label file
        label_file = os.path.splitext(image_file)[0] + '.txt'
        label_path = os.path.join(dataset_root, 'labels', phase, label_file)

        with open(label_path, 'w') as f:
            # Process predicted masks
            if hasattr(result, 'masks') and result.masks is not None:
                pred_masks = result.masks.data.cpu().numpy()
                pred_classes = result.boxes.cls.cpu().numpy().astype(int)
                for mask_pred, class_id in zip(pred_masks, pred_classes):
                    # Convert mask to binary image
                    mask_pred = (mask_pred > 0.5).astype(np.uint8) * 255

                    # Resize mask back to original image size
                    mask_pred_resized = cv2.resize(mask_pred, (original_width, original_height), interpolation=cv2.INTER_NEAREST)

                    # Find contours
                    contours, _ = cv2.findContours(mask_pred_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                    for contour in contours:
                        # Simplify contour
                        epsilon = 0.001 * cv2.arcLength(contour, True)
                        contour = cv2.approxPolyDP(contour, epsilon, True)

                        # Flatten contour array
                        contour = contour.squeeze()
                        if contour.ndim != 2:
                            continue  # Skip if contour is not 2D

                        # Normalize coordinates
                        normalized_contour = contour.astype(np.float32)
                        normalized_contour[:, 0] /= original_width
                        normalized_contour[:, 1] /= original_height

                        # Flatten and convert to list
                        contour_list = normalized_contour.flatten().tolist()

                        # Write to file
                        line = f"{class_id} " + ' '.join(map(str, contour_list))
                        f.write(line + '\n')
            else:
                print(f"No predicted masks for image {image_file}")

            # Process your own mask
            # Threshold mask to binary
            _, mask_bin = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)

            # Find contours
            contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            for contour in contours:
                # Simplify contour
                epsilon = 0.001 * cv2.arcLength(contour, True)
                contour = cv2.approxPolyDP(contour, epsilon, True)

                # Flatten contour array
                contour = contour.squeeze()
                if contour.ndim != 2:
                    continue  # Skip if contour is not 2D

                # Normalize coordinates
                normalized_contour = contour.astype(np.float32)
                normalized_contour[:, 0] /= original_width
                normalized_contour[:, 1] /= original_height

                # Flatten and convert to list
                contour_list = normalized_contour.flatten().tolist()

                # Write to file with mask_class_id
                line = f"{mask_class_id} " + ' '.join(map(str, contour_list))
                f.write(line + '\n')

        # Copy image to dataset folder
        shutil.copy(image_path, os.path.join(dataset_root, 'images', phase, image_file))

# Process train and val datasets
process_dataset('train', train_files)
process_dataset('val', val_files)

# Generate dataset YAML file
dataset_yaml = os.path.join(dataset_root, 'dataset.yaml')
with open(dataset_yaml, 'w') as f:
    f.write(f"path: {dataset_root}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write("test: \n\n")
    f.write("names:\n")
    for class_id in sorted(class_names.keys()):
        f.write(f"  {class_id}: {class_names[class_id]}\n")


# LEO Dataset Processing Pipeline

# Original Data Process (for xyz rxyz is invert)

In [None]:
from pathlib import Path
import json
import numpy as np
from scipy.spatial.transform import Rotation as R
import pandas as pd
data_path = '/home/capre/disk_4/yutao/leo_data/data_2nd'


def switch_xyzrxyz(rxyzxyz:list):
    r_xyz, xyz = rxyzxyz[0:3], rxyzxyz[3:6]
    xyzrxyz = np.hstack((xyz, r_xyz))
    return xyzrxyz.tolist()

# print(switch_xyzrxyz([1,2,3,4,5,6]))

def transform_pose(pose: list, T_cam_to_base):
    pose: np.ndarray = np.array(pose)
    t_xyz = pose[0:3]
    r_xyz = pose[3:6]
    rotation_matrix = R.from_euler('xyz', r_xyz.reshape(1, 3), degrees=False).as_matrix().reshape(3, 3)
    T_end_to_base = np.eye(4)
    T_end_to_base[:3, :3] = rotation_matrix
    T_end_to_base[:3, 3] = t_xyz.ravel()
    T_base_to_cam = np.linalg.inv(T_cam_to_base)

    T_cam_to_end = T_base_to_cam @ T_end_to_base
    new_t = T_cam_to_end[:3, 3]
    new_r = R.from_matrix(T_cam_to_end[:3, :3]).as_euler('xyz', degrees=False)
    xyzrxrzry = np.hstack((new_r, new_t.reshape(-1)))
    return xyzrxrzry.tolist()


path = Path(data_path)
resources_path: Path = path / 'resources'
all_data = {}
x_values, y_values, z_values = [], [], []


import re
pattern = re.compile(r'(\d+$)')


for json_f in path.rglob("*saved_data.json"):
    # related_calib_json = json_f.parent / 'Calibration_results' / 'calibration_results.json'
    # if related_calib_json.exists():
        # calib = json.load(open(related_calib_json))
    saved_data = json.load(open(json_f))
    save = True
    for each_record in saved_data:
        prompt = saved_data[each_record]['prompt']
        if pattern.search(prompt):
            print(pattern.sub("", prompt))
            saved_data[each_record]['prompt'] = pattern.sub("", prompt)
            
        for each_image in saved_data[each_record]['color_files']:
            if not resources_path.joinpath(each_image).exists():
                save = False
                break
        if not save:
            break
        
        # for idx, pose in enumerate(saved_data[each_record]['poses']):
        #     saved_data[each_record]['poses'][idx]['pose0'] = switch_xyzrxyz(saved_data[each_record]['poses'][idx]['pose0'])
        #     if saved_data[each_record]['poses'][idx]['pose1'] is not None:
        #         saved_data[each_record]['poses'][idx]['pose1'] = switch_xyzrxyz(saved_data[each_record]['poses'][idx]['pose1'])


        for idx, pose in enumerate(saved_data[each_record]['pose']):
            # saved_data[each_record]['pose'][idx] = switch_xyzrxyz(pose)
            x_values.append(saved_data[each_record]['pose'][idx][0])
            y_values.append(saved_data[each_record]['pose'][idx][1])
            z_values.append(saved_data[each_record]['pose'][idx][2])
        # print(saved_data[each_record])
        all_data[each_record] = saved_data[each_record]
        # break

# Compute min and max for x, y, z
min_x, max_x = min(x_values), max(x_values)
min_y, max_y = min(y_values), max(y_values)
min_z, max_z = min(z_values), max(z_values)

# Print the results
print(f"Min x: {min_x}, Max x: {max_x}")
print(f"Min y: {min_y}, Max y: {max_y}")
print(f"Min z: {min_z}, Max z: {max_z}")


# map id to real prompts

In [None]:
prompts_map = pd.read_csv(path / 'task_list.csv')
prompts_map['ID'] = prompts_map['ID'].astype(int).astype(str)

for each_record in all_data:
    all_data[each_record]['prompt'] = prompts_map[prompts_map['ID'] == all_data[each_record]['prompt']]['Prompts'].values[0]

# select data

In [None]:
import random

all_data_len = len(all_data)
random_keys = random.sample(list(all_data.keys()), all_data_len)
all_data = {key: all_data[key] for key in random_keys}
print(len(all_data))
json.dump(all_data, open(path / 'all_data.json', 'w'), indent=4)

half_data_len = len(all_data) // 2
random_keys = random.sample(list(all_data.keys()), half_data_len)
half_data = {key: all_data[key] for key in random_keys}
print(len(half_data))
json.dump(half_data, open(path / 'half_data.json', 'w'), indent=4)

half_half_data_len = len(all_data) // 2 // 2
random_keys = random.sample(list(all_data.keys()), half_half_data_len)
half_half_data = {key: all_data[key] for key in random_keys}
print(len(half_half_data))
json.dump(half_half_data, open(path / 'half_half_data.json', 'w'), indent=4)

In [None]:
import cv2
cv2.goodFeaturesToTrack

In [None]:
import plyfile
data = plyfile.PlyData.read('/home/capre/disk_4/yutao/data/resources/0a7ee4d0cf344e15a81c68be0be1fb96_point_cloud_1.ply')

In [None]:
# ! python launch.py task=tuning_vla note=tuning_vla pretrained_ckpt_path=/home/capre/disk_4/yutao/leo/ckpts clip_txt_guidance.flag=True

# recalculate the segmentation

In [None]:
import os
from ultralytics import YOLO
from plyfile import PlyElement, PlyData
import open3d as o3d
import open3d.core as o3c
# os.chdir("/home/capre/disk_4/yutao/ultralytics")
# Load a model
model = YOLO("/home/capre/Point-Cloud-Stream/runs/segment/train6/weights/best.pt")  # load a pretrained model (recommended for training)
o3d_device = o3d.core.Device("CUDA:0")
from utils.segmentation import segment_pcd_from_2d
intrinsic = np.array([
    [
      610.5961520662408,
      0.0,
      639.8919938587554
    ],
    [
      0.0,
      617.4130735412369,
      358.3889735843055
    ],
    [
      0.0,
      0.0,
      1.0
    ]
  ]).T

for idxx, each_record in enumerate(all_data):
    print(f"Processing {idxx}/{len(all_data)}: {each_record}")
    for idx, each_images in enumerate(all_data[each_record]['color_files']):
        # print(each_image)
        depth = resources_path / all_data[each_record]['depth_files'][idx]
        color = resources_path / all_data[each_record]['color_files'][idx]
        depth = np.load(str(depth))
        depth = o3d.geometry.Image(depth)
        color = o3d.io.read_image(str(color))
        depth = o3d.t.geometry.Image.from_legacy(depth)
        color = o3d.t.geometry.Image.from_legacy(color)
        rgbd_image = o3d.t.geometry.RGBDImage(color, depth)
        pcd_frame = o3d.t.geometry.PointCloud.create_from_rgbd_image(
                    rgbd_image, 
                    o3c.Tensor(intrinsic, dtype=o3c.Dtype.Float32, device=o3d_device),
                    o3c.Tensor(np.eye(4), dtype=o3c.Dtype.Float32, device=o3d_device),
                    1000, 3,
                    2, False)
        xyz = np.asarray(pcd_frame.to_legacy().points)
        rgb = np.asarray(pcd_frame.to_legacy().colors)
        color_numpy = np.asarray(color.to_legacy())
        print(color_numpy.shape)
        # try:
        label = segment_pcd_from_2d(model, pcd_frame, 
                                    resources_path / all_data[each_record]['color_files'][idx] 
                                    , intrinsic)
        # except Exception as e:
        #    print(resources_path / all_data[each_record]['color_files'][idx])
        #    print(e)
        pcd_with_labels = np.hstack((xyz, rgb, label.reshape(-1, 1)))
        ply_name = resources_path / all_data[each_record]['point_cloud_files'][idx]
        vertex = np.array(
                    [(x, y, z, r, g, b, s) for x, y, z, r, g, b, s in pcd_with_labels],
                    dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1'),
                           ('segment_id', 'i4')])
        ply = PlyData([PlyElement.describe(vertex, 'vertex')], text=True)
        ply.write(str(ply_name))

    # break



In [None]:
model = YOLO("/home/capre/Point-Cloud-Stream/runs/segment/train6/weights/best.pt")  # load a pretrained model (recommended for training)

# results = model.train(data="Objects365.yaml", epochs=100, imgsz=640)
image_path =  '/home/capre/disk_4/yutao/data/resources/200a4ee908534e36a0aae0e39e2c8854_color_3.png'
res= model.predict(source=image_path, save=False, verbose=False)
res[0].show()


# numpy seg

In [None]:
import os
from ultralytics import YOLO
import numpy as np
from plyfile import PlyElement, PlyData
import open3d as o3d
import open3d.core as o3c
path = Path(r"/home/capre/disk_4/yutao/leo_data/data_2nd")
resources_path = path / 'resources'
# os.chdir("/home/capre/disk_4/yutao/ultralytics")
# Load a model
model = YOLO("/home/capre/Point-Cloud-Stream/runs/segment/train6/weights/best.pt")
o3d_device = o3d.core.Device("CUDA:0")
from app.utils.camera.segmentation_utils import segment_pcd_from_2d
intrinsic = np.array([
    [
      610.5961520662408,
      0.0,
      639.8919938587554
    ],
    [
      0.0,
      617.4130735412369,
      358.3889735843055
    ],
    [
      0.0,
      0.0,
      1.0
    ]
  ]).T

for idxx, each_record in enumerate(all_data):
    print(f"Processing {idxx}/{len(all_data)}: {each_record}")
    
    for idx, each_image in enumerate(all_data[each_record]['color_files']):
        depth_path = resources_path / all_data[each_record]['depth_files'][idx]
        color_path = resources_path / all_data[each_record]['color_files'][idx]

        # Load depth and color images
        depth = np.load(str(depth_path))
        color = np.asarray(o3d.io.read_image(str(color_path)))

        # Process 3-channel depth
        if depth.ndim == 3 and depth.shape[-1] == 3:
            # Decode depth if it's packed
            depth = depth[:, :, 0] + depth[:, :, 1] * 256 + depth[:, :, 2] * 65536

        # Generate point cloud
        height, width = depth.shape
        fx, fy = intrinsic[0, 0], intrinsic[1, 1]
        cx, cy = intrinsic[0, 2], intrinsic[1, 2]

        # Generate pixel grid
        x, y = np.meshgrid(np.arange(width), np.arange(height))
        x = (x - cx) * depth / fx
        y = (y - cy) * depth / fy
        z = depth

        # Stack XYZ coordinates
        valid_mask = z > 0
        xyz = np.stack((x, y, z), axis=-1)[valid_mask]
        rgb = color[valid_mask]

        # Segment point cloud
        label = segment_pcd_from_2d(model, xyz, color_path, intrinsic)

        # Combine XYZ, RGB, and labels
        pcd_with_labels = np.hstack((xyz, rgb / 255.0, label.reshape(-1, 1)))

        # Save as PLY
        ply_name = resources_path / all_data[each_record]['point_cloud_files'][idx]
        vertex = np.array(
            [(x, y, z, r, g, b, s) for x, y, z, r, g, b, s in pcd_with_labels],
            dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'f4'), ('green', 'f4'), ('blue', 'f4'), ('segment_id', 'i4')]
        )
        ply = PlyData([PlyElement.describe(vertex, 'vertex')], text=True)
        ply.write(str(ply_name))

    # Uncomment to debug with a single record
    # break


# batch process seg pcd from yolo

In [None]:
import os
from pathlib import Path
from ultralytics import YOLO
import numpy as np
from plyfile import PlyElement, PlyData
import open3d as o3d
import open3d.core as o3c
import torch
import cv2
import torch.utils.dlpack
from typing import List
from tqdm import tqdm

# Initialize YOLO model
model = YOLO("/home/capre/Point-Cloud-Stream/runs/segment/train6/weights/best.pt")

o3d_device = o3d.core.Device("CUDA:0")

# Intrinsic matrix
intrinsic = np.array([
    [610.5961520662408, 0.0,               639.8919938587554],
    [0.0,               617.4130735412369, 358.3889735843055],
    [0.0,               0.0,               1.0]
], dtype=np.float32)


def o3d_t_to_torch(o3d_t):
    return torch.utils.dlpack.from_dlpack(o3d_t.to_dlpack())

def batch_segment_pcd_from_2d(
    model: YOLO,
    pcds: List[np.ndarray],
    rgbs: List[np.ndarray],
    full_images: List[np.ndarray],
    intrinsics: List[np.ndarray],
    extrinsics: List[np.ndarray],
    threshold: float = 0.5,
    device: str = 'cuda',
    batch_size: int = 128
):
    """
    Perform segmentation for a large set of point clouds and associated color images in batches.

    Parameters
    ----------
    model : YOLO
        A YOLO segmentation model.
    pcds : list of numpy arrays
        Each is an Nx3 point cloud array (masked by valid depth).
    rgbs : list of numpy arrays
        Each is an Nx3 RGB array corresponding to pcds (masked by valid depth).
    full_images : list of numpy arrays
        Each is a full color image (HxWx3, uint8) for YOLO segmentation input.
    intrinsics : list of numpy arrays
        Each is a 3x3 intrinsic matrix.
    extrinsics : list of numpy arrays
        Each is a 4x4 extrinsic matrix.
    threshold : float
        Threshold for mask scores.
    device : str
        'cpu' or 'cuda'.
    batch_size : int
        Number of images to process per batch for YOLO inference.

    Returns
    -------
    labels_list : list of numpy arrays
        List of label arrays corresponding to each point cloud, shape (N,).
    """
    device = torch.device(device if torch.cuda.is_available() else 'cpu')
    total = len(pcds)
    labels_list = [None] * total

    with tqdm(total=total, desc="Processing Point Clouds") as pbar:
        for start_idx in range(0, total, batch_size):
            end_idx = min(start_idx + batch_size, total)
            batch_pcds_sub = pcds[start_idx:end_idx]
            batch_rgbs_sub = rgbs[start_idx:end_idx]
            batch_full_images_sub = full_images[start_idx:end_idx]
            batch_intrinsics_sub = intrinsics[start_idx:end_idx]
            batch_extrinsics_sub = extrinsics[start_idx:end_idx]

            # YOLO inference on the batch of full images
            results = model.predict(source=batch_full_images_sub, device=device, verbose=False)

            for i, (pcd, rgb, full_color_image, intrinsic_mat, extrinsic_mat) in enumerate(
                zip(batch_pcds_sub, batch_rgbs_sub, batch_full_images_sub, batch_intrinsics_sub, batch_extrinsics_sub)
            ):
                idx = start_idx + i
                res = results[i]

                pcd_torch = torch.from_numpy(pcd).to(device, dtype=torch.float32)
                intrinsic_torch = torch.from_numpy(intrinsic_mat).to(device, dtype=torch.float32)
                extrinsic_torch = torch.from_numpy(extrinsic_mat).to(device, dtype=torch.float32)

                # If no detections:
                if res.masks is None or len(res.masks) == 0:
                    labels_list[idx] = np.zeros((pcd.shape[0],), dtype=np.int64)
                    pbar.update(1)
                    continue

                masks = res.masks.data.to(device)  # (num_masks, H', W')
                labels_per_mask = res.boxes.cls.to(device)  # (num_masks,)

                H, W = full_color_image.shape[:2]
                # Resize masks to original size
                masks_resized = torch.nn.functional.interpolate(
                    masks.unsqueeze(1).float(),
                    size=(H, W),
                    mode='bilinear',
                    align_corners=False
                ).squeeze(1)  # (num_masks, H, W)

                N = pcd_torch.shape[0]
                ones = torch.ones((N, 1), device=device, dtype=torch.float32)
                points_3d_hom = torch.cat([pcd_torch, ones], dim=1)  # (N,4)

                # Transform to camera coordinates
                points_cam_hom = (extrinsic_torch @ points_3d_hom.T).T  # (N,4)
                points_cam = points_cam_hom[:, :3]

                valid_depth = points_cam[:, 2] > 0
                points_cam = points_cam[valid_depth]
                indices = torch.nonzero(valid_depth).squeeze(1)

                fx = intrinsic_torch[0, 0]
                fy = intrinsic_torch[1, 1]
                cx = intrinsic_torch[0, 2]
                cy = intrinsic_torch[1, 2]

                x_cam = points_cam[:, 0]
                y_cam = points_cam[:, 1]
                z_cam = points_cam[:, 2]
                u = torch.round((x_cam * fx / z_cam) + cx).long()
                v = torch.round((y_cam * fy / z_cam) + cy).long()

                in_bounds = (u >= 0) & (u < W) & (v >= 0) & (v < H)
                u = u[in_bounds]
                v = v[in_bounds]
                valid_indices = indices[in_bounds]

                if u.numel() == 0:
                    labels = torch.full((N,), 0, dtype=torch.int32, device=device)
                    labels_list[idx] = labels.cpu().numpy()
                    pbar.update(1)
                    continue

                mask_values = masks_resized[:, v, u]  # (num_masks, M)
                mask_scores, mask_ids = torch.max(mask_values, dim=0)

                valid_mask_points = mask_scores > threshold
                final_indices = valid_indices[valid_mask_points]

                point_labels = torch.full((N,), -1, dtype=torch.int32, device=device)
                point_labels[final_indices] = labels_per_mask[mask_ids[valid_mask_points]].int()

                labels = point_labels.cpu().numpy()
                labels_list[idx] = labels

                pbar.update(1)

    return labels_list


# --------------------------------------------------
# Example Data Loading and Processing
# --------------------------------------------------
# The 'all_data' structure should be something like:
# all_data = {
#     record_key: {
#         'color_files': [...], # list of filenames
#         'depth_files': [...], # list of filenames
#         'point_cloud_files': [...] # corresponding ply filenames
#     },
#     ...
# }

path = Path("/home/capre/disk_4/yutao/leo_data/data_2nd")
resources_path = path / 'resources'

batch_pcds = []
batch_rgbs = []
batch_full_colors = []
batch_intrinsics = []
batch_extrinsics = []
batch_info = []  # (each_record, idx)

record_keys = list(all_data.keys())
for each_record in record_keys:
    for idx, each_image in enumerate(all_data[each_record]['color_files']):
        depth_path = resources_path / all_data[each_record]['depth_files'][idx]
        color_path = resources_path / all_data[each_record]['color_files'][idx]

        # Load depth
        depth = np.load(str(depth_path))
        # Load full color image
        full_color = np.asarray(o3d.io.read_image(str(color_path)))  # HxWx3, uint8

        # Process depth if it's 3-channel
        if depth.ndim == 3 and depth.shape[-1] == 3:
            depth = depth[:, :, 0] + depth[:, :, 1] * 256 + depth[:, :, 2] * 65536

        height, width = depth.shape
        fx, fy = intrinsic[0, 0], intrinsic[1, 1]
        cx, cy = intrinsic[0, 2], intrinsic[1, 2]

        x, y = np.meshgrid(np.arange(width), np.arange(height))
        X = (x - cx) * depth / fx
        Y = (y - cy) * depth / fy
        Z = depth

        valid_mask = Z > 0
        xyz = np.stack((X, Y, Z), axis=-1)[valid_mask]
        rgb = full_color[valid_mask]

        # Store masked xyz and rgb, and full image
        batch_pcds.append(xyz)
        batch_rgbs.append(rgb)
        batch_full_colors.append(full_color)  # unmasked full image for YOLO
        batch_intrinsics.append(intrinsic)
        batch_extrinsics.append(np.eye(4))
        batch_info.append((each_record, idx))

# Run the segmentation in batches of 128
labels_list = batch_segment_pcd_from_2d(
    model=model,
    pcds=batch_pcds,
    rgbs=batch_rgbs,
    full_images=batch_full_colors,
    intrinsics=batch_intrinsics,
    extrinsics=batch_extrinsics,
    device='cuda',
    batch_size=100
)

# Save results
for i, labels in tqdm(enumerate(labels_list), desc='Saving ply', total=len(labels_list)):
    xyz = batch_pcds[i]
    rgb = batch_rgbs[i]
    pcd_with_labels = np.hstack((xyz, rgb / 255.0, labels.reshape(-1, 1)))

    each_record, idx = batch_info[i]
    ply_name = resources_path / all_data[each_record]['point_cloud_files'][idx]

    vertex = np.array(
        [(x_p, y_p, z_p, r_p, g_p, b_p, s_p) for x_p, y_p, z_p, r_p, g_p, b_p, s_p in pcd_with_labels],
        dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
               ('red', 'f4'), ('green', 'f4'), ('blue', 'f4'), ('segment_id', 'i4')]
    )
    ply = PlyData([PlyElement.describe(vertex, 'vertex')], text=True)
    ply.write(str(ply_name))


In [None]:
import multiprocessing
from functools import partial
from tqdm import tqdm
from pathlib import Path
from plyfile import PlyData, PlyElement
import numpy as np

def save_ply_file(i, labels, batch_pcds, batch_rgbs, batch_info, resources_path, all_data):
    """
    Save a PLY file for the given index and associated data.

    Parameters:
    - i: Index of the data.
    - labels: Labels for the point cloud.
    - batch_pcds: List of point clouds (Nx3 arrays).
    - batch_rgbs: List of RGB values for the points (Nx3 arrays).
    - batch_info: List of (record, index) pairs for metadata.
    - resources_path: Path to save the PLY files.
    - all_data: Dictionary containing file paths for saving.
    """
    xyz = batch_pcds[i]
    rgb = batch_rgbs[i]
    pcd_with_labels = np.hstack((xyz, rgb / 255.0, labels.reshape(-1, 1)))

    each_record, idx = batch_info[i]
    ply_name = resources_path / all_data[each_record]['point_cloud_files'][idx]

    vertex = np.array(
        [(x_p, y_p, z_p, r_p, g_p, b_p, s_p) for x_p, y_p, z_p, r_p, g_p, b_p, s_p in pcd_with_labels],
        dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
               ('red', 'f4'), ('green', 'f4'), ('blue', 'f4'), ('segment_id', 'i4')]
    )
    ply = PlyData([PlyElement.describe(vertex, 'vertex')], text=True)
    ply.write(str(ply_name))
    del pcd_with_labels, ply


def save_ply_files_parallel(labels_list, batch_pcds, batch_rgbs, batch_info, resources_path, all_data):
    """
    Save PLY files using multiprocessing.

    Parameters:
    - labels_list: List of labels (one per point cloud).
    - batch_pcds: List of point clouds (Nx3 arrays).
    - batch_rgbs: List of RGB values for the points (Nx3 arrays).
    - batch_info: List of (record, index) pairs for metadata.
    - resources_path: Path to save the PLY files.
    - all_data: Dictionary containing file paths for saving.
    """
    # Prepare partial function for multiprocessing
    save_ply_partial = partial(
        save_ply_file,
        batch_pcds=batch_pcds,
        batch_rgbs=batch_rgbs,
        batch_info=batch_info,
        resources_path=resources_path,
        all_data=all_data
    )

    # Use multiprocessing to save PLY files
    with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
        list(tqdm(pool.imap(save_ply_partial, enumerate(labels_list)), desc="Saving PLY files", total=len(labels_list)))


# ---------------------------------
# Example Usage
# ---------------------------------
# Assuming you already have:
# - labels_list
# - batch_pcds
# - batch_rgbs
# - batch_info
# - resources_path
# - all_data

save_ply_files_parallel(labels_list, batch_pcds, batch_rgbs, batch_info, resources_path, all_data)
