In [None]:
#!/usr/bin/env python3
"""
Point Cloud ⇢ Wireframe Face Labeller (faces-from-lines with coplanar merge)

Goal: avoid the common over-segmentation where one intended face is recognized
as two (or more) due to diagonals/chords in the wireframe. This script:

1) Parses OBJ (v + l).
2) Detects simple cycles (3+ edges) that are approximately coplanar.
3) Groups cycles by plane (normal & offset tolerance).
4) **Merges coplanar, overlapping cycles into single faces** (prefers Shapely
   polygon union; falls back to a chord-pruning heuristic if Shapely isn't
   available).
5) Assigns each XYZ point to the merged faces and writes XYZ-only + FACE_ID.

Output format: each row → X Y Z FACE_ID (−1 if no match).

Dependencies:
- numpy, scipy (KDTree). Optional: shapely (strongly recommended for robust
  merge of coplanar loops with holes).

Usage
-----
python pointcloud_label_by_wireframe_merged.py \
  --xyz input.xyz \
  --obj frame.obj \
  --out labelled.xyz \
  --plane_tol 0.02 \
  --ang_tol_deg 2.0 \
  --plane_d_tol 0.05 \
  --max_distance 2.0 \
  --cycle_len_max 32 \
  --chunk_size 200000

Tolerances (CRS units, e.g., meters):
* plane_tol: RMS distance of loop vertices to its best-fit plane.
* ang_tol_deg: angle threshold for plane normal similarity when grouping.
* plane_d_tol: allowed plane offset difference when grouping faces by plane.
* max_distance: max perpendicular distance of a point from plane to be inside.
"""
from __future__ import annotations
import argparse, math, sys
from pathlib import Path
from typing import List, Tuple, Dict, Iterable, Optional
import numpy as np
from scipy.spatial import cKDTree

try:
    from shapely.geometry import Polygon
    from shapely.ops import unary_union
    _HAVE_SHAPELY = True
except Exception:
    _HAVE_SHAPELY = False

display(f"Shapely available: {_HAVE_SHAPELY}")

# ---------------------------- OBJ parsing ---------------------------------- #

def parse_obj(path: Path):
    verts, lines = [], []
    with path.open("r", encoding="utf-8", errors="ignore") as f:
        for raw in f:
            s = raw.strip()
            if not s or s.startswith("#"): continue
            if s.startswith("v "):
                _, xs, ys, zs, *rest = s.split()
                verts.append([float(xs), float(ys), float(zs)])
            elif s.startswith("l "):
                parts = s.split()[1:]
                vids = [int(p.split("/")[0]) - 1 for p in parts]
                for a, b in zip(vids, vids[1:]):
                    if a != b:
                        lines.append((min(a, b), max(a, b)))
    return np.asarray(verts, float), sorted(set(lines))

# ----------------------------- Geometry ------------------------------------ #

def fit_plane(points: np.ndarray):
    c = points.mean(0)
    Q = points - c
    _, _, VT = np.linalg.svd(Q, full_matrices=False)
    n = VT[-1]
    n = n / (np.linalg.norm(n) + 1e-12)
    rms = float(np.sqrt(np.mean((Q @ n) ** 2)))
    return c, n, rms

def plane_basis(n: np.ndarray):
    n = n / (np.linalg.norm(n) + 1e-12)
    a = np.array([1.0, 0.0, 0.0])
    if abs(np.dot(a, n)) > 0.9:
        a = np.array([0.0, 1.0, 0.0])
    u = np.cross(n, a); u /= (np.linalg.norm(u) + 1e-12)
    v = np.cross(n, u); v /= (np.linalg.norm(v) + 1e-12)
    return u, v

def project_points(P: np.ndarray, origin: np.ndarray, n: np.ndarray) -> np.ndarray:
    u, v = plane_basis(n)
    Q = P - origin
    return np.stack([Q @ u, Q @ v], axis=1)

# ----------------------------- Cycles -------------------------------------- #

def build_adj(nv: int, lines: List[Tuple[int,int]]):
    adj = [[] for _ in range(nv)]
    for a,b in lines:
        adj[a].append(b); adj[b].append(a)
    for lst in adj: lst.sort()
    return adj

def find_simple_cycles(adj, maxlen=32):
    n = len(adj)
    cycles, seen = [], set()
    def canonical(path):
        m = min(path); i = path.index(m)
        rot = path[i:] + path[:i]
        r1, r2 = tuple(rot), tuple(reversed(rot))
        return r1 if r1 < r2 else r2
    def dfs(start,u,parent,stack,blocked):
        if len(stack) > maxlen: return
        for v in adj[u]:
            if v == parent: continue
            if v == start and len(stack) >= 3:
                key = canonical(stack)
                if key not in seen:
                    seen.add(key); cycles.append(list(key))
                continue
            if v in blocked or v in stack: continue
            stack.append(v); dfs(start,v,u,stack,blocked); stack.pop()
    blocked=set()
    for s in range(n):
        if not adj[s]: continue
        dfs(s,s,-1,[s],blocked); blocked.add(s)
    return cycles

# -------------- Build faces from lines & merge coplanar -------------------- #

def derive_cycles_as_faces(V, lines, plane_tol: float, cycle_len_max: int):
    """Return list of provisional faces dicts: origin, normal, poly2d, vids."""
    adj = build_adj(len(V), lines)
    faces = []
    for cyc in find_simple_cycles(adj, cycle_len_max):
        pts = V[np.array(cyc)]
        origin, normal, rms = fit_plane(pts)
        if rms > plane_tol:
            continue
        poly2d = project_points(pts, origin, normal)
        # discard degenerate
        area = 0.5 * float(np.dot(poly2d[:,0], np.roll(poly2d[:,1], -1)) - np.dot(poly2d[:,1], np.roll(poly2d[:,0], -1)))
        if abs(area) < 1e-12:
            continue
        faces.append({'vertex_ids':cyc,'origin':origin,'normal':normal,'poly2d':poly2d})
    return faces

def group_by_plane(faces: List[Dict], ang_tol_deg: float, plane_d_tol: float):
    groups: List[List[int]] = []
    used = [False]*len(faces)
    ang_tol = math.radians(ang_tol_deg)
    for i, f in enumerate(faces):
        if used[i]: continue
        used[i] = True
        group = [i]
        n0 = f['normal']; o0 = f['origin']
        for j in range(i+1, len(faces)):
            if used[j]:
                continue
            nj = faces[j]['normal']; oj = faces[j]['origin']
            # orientation-insensitive
            cosang = abs(np.clip(np.dot(n0, nj), -1.0, 1.0))
            if cosang < math.cos(ang_tol):
                continue
            # plane offset distance along normal
            d = abs(np.dot(oj - o0, n0))
            if d <= plane_d_tol:
                used[j] = True
                group.append(j)
        groups.append(group)
    return groups

# --------------------------- Merge with Shapely ---------------------------- #

def merge_group_shapely(V, faces: List[Dict], idxs: List[int]):
    """Return list of merged faces (with holes handled) using Shapely union."""
    ref = faces[idxs[0]]
    origin, normal = ref['origin'], ref['normal']
    U = project_points(V, origin, normal)  # project all vertices once

    polys = []
    for k in idxs:
        cyc = faces[k]['vertex_ids']
        ring = U[np.array(cyc)]
        polys.append(Polygon(ring))
    merged = unary_union(polys)

    out = []
    def add_poly(p):
        exterior = np.asarray(p.exterior.coords)[:,:2]
        holes = [np.asarray(r.coords)[:,:2] for r in p.interiors]
        out.append({'origin':origin,'normal':normal,'poly2d':exterior,'holes2d':holes})
    if merged.geom_type == 'Polygon':
        add_poly(merged)
    elif merged.geom_type == 'MultiPolygon':
        for p in merged.geoms:
            add_poly(p)
    return out

# ----------------------- Heuristic merge fallback -------------------------- #

def merge_group_heuristic(faces: List[Dict], idxs: List[int]):
    # Keep largest-area simple boundary per group; drop likely chord-induced cycles
    polys = []
    for k in idxs:
        poly = faces[k]['poly2d']
        area = 0.5 * float(np.dot(poly[:,0], np.roll(poly[:,1], -1)) - np.dot(poly[:,1], np.roll(poly[:,0], -1)))
        polys.append((k, abs(area)))
    polys.sort(key=lambda x: x[1], reverse=True)
    # keep top K (could be more than 1 if areas are distinct and non-overlapping)
    keep = [polys[0][0]] if polys else []
    out = []
    for k in keep:
        out.append({'origin':faces[k]['origin'],'normal':faces[k]['normal'],'poly2d':faces[k]['poly2d'],'holes2d':[]})
    return out

# ---------------------------- Build final faces ---------------------------- #

def build_merged_faces(V, lines, plane_tol, ang_tol_deg, plane_d_tol, cycle_len_max):
    prelim = derive_cycles_as_faces(V, lines, plane_tol, cycle_len_max)
    if not prelim:
        return []
    groups = group_by_plane(prelim, ang_tol_deg, plane_d_tol)
    merged = []
    for g in groups:
        if _HAVE_SHAPELY:
            merged.extend(merge_group_shapely(V, prelim, g))
        else:
            merged.extend(merge_group_heuristic(prelim, g))
    return merged

# ---------------------------- Point membership ----------------------------- #

def point_in_face(p: np.ndarray, face: Dict, maxdist: float) -> bool:
    origin, normal = face['origin'], face['normal']
    d = float(np.dot(p - origin, normal))
    if maxdist > 0 and abs(d) > maxdist:
        return False
    # 2D projection and winding test with holes
    u, v = plane_basis(normal)
    k2 = np.array([np.dot(p-origin, u), np.dot(p-origin, v)])
    def wn(poly):
        x, y = poly[:,0], poly[:,1]
        wn = False
        for i in range(len(poly)):
            j = (i+1) % len(poly)
            xi, yi, xj, yj = x[i], y[i], x[j], y[j]
            if ((yi > k2[1]) != (yj > k2[1])) and (k2[0] < (xj - xi) * (k2[1] - yi) / ((yj - yi) + 1e-12) + xi):
                wn = not wn
        return wn
    if not wn(face['poly2d']):
        return False
    for hole in face.get('holes2d', []):
        if wn(hole):
            return False
    return True

# ------------------------------ IO helpers -------------------------------- #

def stream_xyz(path: Path, chunk_size: int):
    buf = []
    with path.open('r', encoding='utf-8', errors='ignore') as f:
        for line in f:
            s = line.strip()
            if not s: continue
            parts = s.split()
            try:
                row = [float(x) for x in parts[:3]]
            except Exception:
                continue
            if len(row) < 3: continue
            buf.append(row)
            if len(buf) >= chunk_size:
                yield np.asarray(buf, float); buf.clear()
    if buf:
        yield np.asarray(buf, float)

def write_xyz(out_fh, chunk: np.ndarray, ids: np.ndarray):
    for (x,y,z), fid in zip(chunk, ids):
        out_fh.write(f"{x} {y} {z} {int(fid)}\n")


# ------------------------------ Label runners ----------------------------- #

def label_one(xyz_path: Path, obj_path: Path, out_path: Path,
              plane_tol: float, ang_tol_deg: float, plane_d_tol: float,
              max_distance: float, cycle_len_max: int, chunk_size: int):
    V, lines = parse_obj(obj_path)
    if len(V) == 0 or len(lines) == 0:
        print(f'ERROR: OBJ missing vertices or lines → {obj_path}', file=sys.stderr)
        return False
    faces = build_merged_faces(V, lines, plane_tol, ang_tol_deg, plane_d_tol, cycle_len_max)
    if not faces:
        print(f'ERROR: No valid faces derived after merging → {obj_path}', file=sys.stderr)
        return False
    cents = []
    for f in faces:
        poly = f['poly2d']
        c2 = poly.mean(axis=0)
        u, v = plane_basis(f['normal'])
        c3 = f['origin'] + c2[0]*u + c2[1]*v
        cents.append(c3)
    kdt = cKDTree(np.vstack(cents))

    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open('w') as out_fh:
        for chunk in stream_xyz(xyz_path, chunk_size):
            ids = np.full(len(chunk), -1, int)
            k = min(48, len(faces))
            _, cand = kdt.query(chunk, k=k, workers=-1)
            if k == 1:
                cand = cand.reshape(-1, 1)
            for i, p in enumerate(chunk):
                best = -1
                for fid in cand[i]:
                    if point_in_face(p, faces[int(fid)], max_distance):
                        best = int(fid); break
                ids[i] = best
            write_xyz(out_fh, chunk, ids)
    return True


def label_batch(root: Path, xyz_dir: str = 'xyzs', obj_dir: str = 'objs', out_dir: str = 'outputs',
                pattern: str = '*.xyz', **kwargs):
    xdir = (root / xyz_dir)
    odir = (root / obj_dir)
    outd = (out_dir)
    if not xdir.exists() or not odir.exists():
        print(f'ERROR: Missing input directories: {xdir} or {odir}', file=sys.stderr)
        return 2
    outd.mkdir(parents=True, exist_ok=True)

    from glob import glob
    xyz_files = sorted([Path(p) for p in glob(str(xdir / pattern))])
    if not xyz_files:
        print(f'No XYZ files found under {xdir} matching {pattern}', file=sys.stderr)
        return 3

    ok = 0; fail = 0; miss = 0
    for xyzp in xyz_files:
        stem = xyzp.stem
        objp = odir / f'{stem}.obj'
        outp = outd / f'{stem}.xyz'
        if not objp.exists():
            print(f'WARN: OBJ not found for {stem}: {objp}')
            miss += 1
            continue
        print(f'→ Labeling {stem} ...')
        res = label_one(xyzp, objp, outp, **kwargs)
        if res:
            ok += 1
        else:
            fail += 1
    print(f'Done. OK: {ok} | Failed: {fail} | Missing OBJ: {miss} | Outputs → {outd}')
    return 0 if fail == 0 else 4



In [None]:
# ------------------------------ Main Test --------------------------------- #
# Manual configuration — replace paths/values as needed
xyz_file = Path("data/Entry-level/train/xyz/2.xyz")
obj_file = Path("data/Entry-level/train/wireframe/2.obj")
out_file = Path("outputs/2.xyz")

PLANE_TOL = 0.02
ANG_TOL_DEG = 2.0
PLANE_D_TOL = 0.05
MAX_DISTANCE = 1.0
CYCLE_LEN_MAX = 32
CHUNK_SIZE = 200000

def test_main():
    V, lines = parse_obj(obj_file)
    if len(V) == 0 or len(lines) == 0:
        print('ERROR: OBJ missing vertices or lines.', file=sys.stderr)
        return

    faces = build_merged_faces(V, lines, PLANE_TOL, ANG_TOL_DEG, PLANE_D_TOL, CYCLE_LEN_MAX)
    if not faces:
        print('ERROR: No valid faces derived from wireframe after merging.', file=sys.stderr)
        return

    # KD tree of face centroids for candidate filtering
    cents = []
    for f in faces:
        poly = f['poly2d']
        c2 = poly.mean(axis=0)
        u, v = plane_basis(f['normal'])
        c3 = f['origin'] + c2[0]*u + c2[1]*v
        cents.append(c3)
    kdt = cKDTree(np.vstack(cents))

    with out_file.open('w') as out_fh:
        for chunk in stream_xyz(xyz_file, CHUNK_SIZE):
            ids = np.full(len(chunk), -1, int)
            k = min(48, len(faces))
            _, cand = kdt.query(chunk, k=k, workers=-1)
            if k == 1:
                cand = cand.reshape(-1, 1)
            for i, p in enumerate(chunk):
                best = -1
                for fid in cand[i]:
                    if point_in_face(p, faces[int(fid)], MAX_DISTANCE):
                        best = int(fid)
                        break
                ids[i] = best
            write_xyz(out_fh, chunk, ids)

    if not _HAVE_SHAPELY:
        print('NOTE: Shapely not found; used heuristic merge. Install shapely for robust unions.', file=sys.stderr)

    print(f"Derived {len(faces)} merged faces | Wrote: {out_file}")

if __name__ == '__main__':
    test_main()

In [None]:
"""
Directory layout for batch mode:
/data
  ├── xyzs/
  │    └── 2.xyz
  ├── objs/
  │    └── 2.obj
  └── outputs/   ← will be created if missing
       └── 2.xyz
"""
ROOT = Path("data/Entry-level/train")
XYZ_DIR = "xyz"   # subfolder under each root with .xyz files
OBJ_DIR = "wireframe"   # subfolder under each root with .obj wireframes
OUT_DIR = Path("outputs")  # outputs will be written under each root/OUT_DIR
PATTERN = "*.xyz"

PLANE_TOL = 0.02
ANG_TOL_DEG = 2.0
PLANE_D_TOL = 0.05
MAX_DISTANCE = 1.0
CYCLE_LEN_MAX = 32
CHUNK_SIZE = 200000

def test_main():
    kwargs = dict(
        plane_tol=PLANE_TOL,
        ang_tol_deg=ANG_TOL_DEG,
        plane_d_tol=PLANE_D_TOL,
        max_distance=MAX_DISTANCE,
        cycle_len_max=CYCLE_LEN_MAX,
        chunk_size=CHUNK_SIZE,
    )
    # for root in ROOTS:
    root = ROOT
    print(f"→ Labeling batch under: {root}")
    rc = label_batch(root, xyz_dir=XYZ_DIR, obj_dir=OBJ_DIR, out_dir=OUT_DIR, pattern=PATTERN, **kwargs)
    if rc != 0:
        print(f"Warning: label_batch returned {rc} for {root}", file=sys.stderr)
    print("All done.")

if __name__ == '__main__':
    test_main()