In [22]:
import open3d as o3d
import torch, numpy as np, pprint
import os

# torch.set_printoptions(threshold=torch.inf)

# 加载序列化的 .pt 文件
pt_path = '/data/processed/real3dad/train/diamond_491_bulge_cut.pt'
#pt_path = '/data/processed/real3dad/val/starfish_506_bulges_cut.pt'
#pt_path = '/data/processed/real3dad/train/diamond_470_bulge_cut.pt'

# 1) 基于 pt_path 推导 pcd_path / gt_path
stem = os.path.basename(pt_path)[:-3]  # 去掉 .pt
split = os.path.basename(os.path.dirname(pt_path))  # train / val ...
category, name = stem.split("_", 1)
pcd_path_auto = f"/data/Real3D-AD-PCD/{category}/test_neo/{name}.pcd"
gt_path_auto  = f"/data/processed/real3dad/instance_gt/{split}/{stem}.txt"

print(f"\n[路径推导]\n  pcd_path -> {pcd_path_auto}\n  gt_path  -> {gt_path_auto}")
pcd_path = pcd_path_auto
gt_path = gt_path_auto


# 让 numpy / torch 打印不省略
np.set_printoptions(threshold=np.inf, linewidth=200, suppress=True)
try:
    torch.set_printoptions(profile="full")  # PyTorch 全量打印
except TypeError:
    torch.set_printoptions(threshold=float('inf'), linewidth=200)

def to_numpy(x):
    if isinstance(x, np.ndarray):
        return x
    if torch.is_tensor(x):
        return x.detach().cpu().numpy()
    return np.asarray(x)

def print_full(name, arr, as_int=False):
    a = to_numpy(arr)
    if as_int:
        a = a.astype(int)
    print(f"\n[{name}] shape={a.shape}, dtype={a.dtype}")
    # 用 array2string 强制完整展开
    print(np.array2string(a, max_line_width=200, separator=', '))

# 载入 .pt
data = torch.load(pt_path, map_location='cpu')

# 元信息
print("\n[meta]")
pprint.pprint(data['meta'], width=120)

# 全量打印各字段
#print_full('points', data['points'])                       # (N,3)
#print_full('semantic_labels_pp', data['semantic_labels_pp'])   # (N,)
#print_full('instance_labels_pp', data['instance_labels_pp'])   # (N,)
#print_full('instance_masks', data['instance_masks'])           # (K,N) 布尔
print_full('semantic_labels_inst', data['semantic_labels_inst'])  # (K,)


# 2) 读取“一列整型 GT”（长度=N）
try:
    gt = np.loadtxt(gt_path, dtype=int)
    if gt.ndim == 2 and gt.shape[1] == 1:
        gt = gt.ravel()
except Exception as e:
    raise RuntimeError(f"读取 GT 失败: {gt_path}") from e

N = data["points"].shape[0]
if gt.shape[0] != N:
    raise ValueError(f"GT 长度({gt.shape[0]}) != 点数 N({N})")

sem_pp = to_numpy(data["semantic_labels_pp"]).astype(int).ravel()
inst_pp = to_numpy(data["instance_labels_pp"]).astype(int).ravel()

print(f"[基本信息] N={N}, GT唯一实例ID(>0)={np.unique(gt[gt>0]).tolist()}")

# 3) 背景一致性（只看“是否为实例点”这一层面）
gt_fg = gt > 0
pt_fg = (inst_pp > 0) & (inst_pp != 255)  # 兼容 255 作为忽略/背景
fg_mismatch = np.where(gt_fg ^ pt_fg)[0]
if fg_mismatch.size == 0:
    print("[OK] GT 与 PT 的前景/背景划分一致")
else:
    print(f"[WARN] 前景/背景不一致点数: {fg_mismatch.size}（举例索引前10）: {fg_mismatch[:10].tolist()}")

# 4) 每个 GT 实例内的 PT 实例ID 是否“一致”（不拆不并）
bad_inst = []
for gid in np.unique(gt[gt > 0]):
    idx = (gt == gid)
    vals = inst_pp[idx]
    uniq = np.unique(vals)
    # 允许 PT 用同一个正ID 标全部点；不接受多ID/含0/255
    if not (uniq.size == 1 and uniq[0] > 0 and uniq[0] != 255):
        bad_inst.append(int(gid))
if not bad_inst:
    print("[OK] 每个 GT 实例在 PT 的 instance_labels_pp 中保持成块（一致的正实例ID）")
else:
    print(f"[WARN] 下列 GT 实例在 PT 中被拆分/含背景ID: {bad_inst}")

# 5) 每个 GT 实例内语义是否“基本纯净”（可选，阈值很严）
bad_sem = []
for gid in np.unique(gt[gt > 0]):
    idx = (gt == gid)
    vals, cnts = np.unique(sem_pp[idx], return_counts=True)
    major_ratio = cnts.max() / idx.sum()
    if major_ratio < 0.9999:
        bad_sem.append((int(gid), float(major_ratio)))
if not bad_sem:
    print("[OK] 每个 GT 实例内的 semantic_labels_pp 近乎单一")
else:
    print(f"[WARN] 存在语义不纯实例（主语义占比<0.9999）: {bad_sem[:10]}")

pcd = o3d.io.read_point_cloud(pcd_path)
o3d.visualization.draw_plotly([pcd], width=1000, height=600)


[路径推导]
  pcd_path -> /data/Real3D-AD-PCD/diamond/test_neo/491_bulge_cut.pcd
  gt_path  -> /data/processed/real3dad/instance_gt/train/diamond_491_bulge_cut.txt

[meta]
{'category': 'good', 'category_id': 1, 'file': '/data/Real3D-AD-PCD/diamond/test_neo/491_bulge_cut.pcd'}

[semantic_labels_inst] shape=(1,), dtype=int32
[1]
[基本信息] N=8192, GT唯一实例ID(>0)=[1001]
[OK] GT 与 PT 的前景/背景划分一致
[OK] 每个 GT 实例在 PT 的 instance_labels_pp 中保持成块（一致的正实例ID）
[OK] 每个 GT 实例内的 semantic_labels_pp 近乎单一


In [4]:
# GT一致性检查
import numpy as np, torch
from pathlib import Path

root = Path("/data/processed/real3dad")
scene = "starfish_506_bulges_cut"

pt = torch.load(root/"val"/f"{scene}.pt", map_location="cpu")
N_pt = pt["points"].shape[0]

gt = np.loadtxt(root/"instance_gt"/"val"/f"{scene}.txt", dtype=np.int64)
print("N_pt =", N_pt, " N_gt =", gt.shape[0], " equal? ", N_pt == gt.shape[0])
print("unique semantic_id in GT:", np.unique(gt//1000))
print("unique inst_index in GT:", np.unique(gt%1000))

N_pt = 8192  N_gt = 8192  equal?  True
unique semantic_id in GT: [0 1]
unique inst_index in GT: [0 1 2]


In [5]:
import torch, glob, numpy as np

folders = ['train', 'val', 'test']
base = '/data/processed/real3dad'
all_files = []
for folder in folders:
    all_files.extend(glob.glob(f"{base}/{folder}/*.pt"))

label_counts = {}
bad = []
empty = 0
point_spans = {} 
instance_counts = {}

for fp in all_files:
    data = torch.load(fp, map_location='cpu')
    semantic_pp = np.asarray(data['semantic_labels_pp'])
    if len(semantic_pp) == 0:
        empty += 1
        continue
    if semantic_pp.min() < 0 or semantic_pp.max() > 1:
        bad.append(fp)
    binc = np.bincount(semantic_pp)
    counts = {label: count for label, count in enumerate(binc) if count > 0}
    if counts:
        label_counts[fp] = counts
    # 新增：统计xyz跨度
    pts = np.asarray(data['points'])  # (N,3)
    span = pts.max(axis=0) - pts.min(axis=0)  # (3,)
    point_spans[fp] = span
    # 实例数量
    semantic_labels_inst = np.asarray(data['semantic_labels_inst'])
    instance_counts[fp] = len(semantic_labels_inst)

print("empty samples:", empty)
print("label out of {0,1} files:", len(bad))

# 收集所有出现过的label，按255优先、其余降序
all_labels = set()
for counts in label_counts.values():
    all_labels.update([k for k in counts])
if 255 in all_labels:
    sorted_labels = [255] + sorted([l for l in all_labels if l != 255], reverse=True)
else:
    sorted_labels = sorted(all_labels, reverse=True)

# 文件排序，按255标签数量倒序（无255的文件视为0）
sorted_items = sorted(
    label_counts.items(),
    key=lambda x: x[1].get(255, 0),
    reverse=True
)

# 打印表头
header = "filename".ljust(60) + "".join([f"{l:>8}" for l in sorted_labels]) + "   n_inst   span_x   span_y   span_z"
print(header)
print("=" * len(header))

# 全量打印
for fp, counts in sorted_items:
    row = fp.ljust(60)
    for l in sorted_labels:
        row += f"{counts.get(l, 0):>8}"
    n_inst = instance_counts[fp]
    span = point_spans[fp]  # (3,)
    row += f"{n_inst:8d}{span[0]:9.3f}{span[1]:9.3f}{span[2]:9.3f}"
    print(row)

# 总计输出
total = {l: 0 for l in sorted_labels}
for counts in label_counts.values():
    for label, count in counts.items():
        if label in total:
            total[label] += count
print("\nTotal count for all labels across all files:")
for l in sorted_labels:
    print(f"label {l:>3}: {total[l]}")


empty samples: 0
label out of {0,1} files: 164
filename                                                           2       1       0   n_inst   span_x   span_y   span_z
/data/processed/real3dad/train/fish_336_sink_cut.pt              144       0    8048       1   27.480   25.022   10.220
/data/processed/real3dad/train/duck_586_bulge_cut.pt               0     347    7845       1   29.264   29.326   14.163
/data/processed/real3dad/train/starfish_597_sink_cut.pt          208       0    7984       1   26.362   26.098   11.426
/data/processed/real3dad/train/gemstone_300_bulge.pt               0     582    7610       1   21.689   17.324    8.780
/data/processed/real3dad/train/toffees_462_bulge_cut.pt            0     405    7787       1   32.060   34.314   13.201
/data/processed/real3dad/train/toffees_448_bulge.pt                0     378    7814       1   32.016   34.393   13.369
/data/processed/real3dad/train/diamond_525_sink.pt               558       0    7634       1   19.003   17.274  