# AP2.1 – Preprocessing Exploration

Exploration eines CAD-Modells durch die Preprocessing-Pipeline.  
Vorher/Nachher-Vergleich für jeden Step.

**Verwendung:** STEP-Pfad in Zelle 2 anpassen, dann alle Zellen ausführen.

In [1]:
import sys
from pathlib import Path

import numpy as np
import open3d as o3d
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

project_root = Path().resolve().parent
if str(project_root / 'src') not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))

from schweiss_ki.preprocessing import (
    PreprocessingPipeline,
    StatisticalOutlierFilter,
    RadiusOutlierFilter,
    VoxelGridDownsampler,
    NormalEstimator,
)
from schweiss_ki.core.data_structures import WeldVolumeModel

print('Imports OK')

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Imports OK


## 1 – Konfiguration

In [2]:
STEP_FILE      = Path('../data/raw/step_files/DVS-Demo.STEP')
CONFIG         = Path('../configs/pipeline.yaml')
VIZ_MAX_POINTS = 50_000

print(f'STEP:      {STEP_FILE}')
print(f'Existiert: {STEP_FILE.exists()}')

STEP:      ../data/raw/step_files/DVS-Demo.STEP
Existiert: True


## 2 – CAD-Konvertierung

In [3]:
from client.core import CADConverterClient
import time

client = CADConverterClient()

tmp_ply = Path('../data/processed/notebook_exploration') / STEP_FILE.stem / 'pointcloud_raw.ply'
tmp_ply.parent.mkdir(parents=True, exist_ok=True)

print('Konvertiere STEP → PLY ...')
t = time.time()
ply_path = client.convert_to_ply(str(STEP_FILE), str(tmp_ply))
print(f'Fertig in {time.time() - t:.2f}s')

pcd_raw = o3d.io.read_point_cloud(str(ply_path))
bbox = pcd_raw.get_axis_aligned_bounding_box()
ext  = bbox.get_extent()
print(f'Punkte:       {len(pcd_raw.points):,}')
print(f'Bounding Box: {ext[0]:.1f} × {ext[1]:.1f} × {ext[2]:.1f} mm')

Konvertiere STEP → PLY ...
Fertig in 0.16s
Punkte:       8,192
Bounding Box: 320.0 × 70.0 × 120.0 mm


## 3 – Hilfsfunktionen

In [4]:
def subsample(pcd, max_points=VIZ_MAX_POINTS):
    pts = np.asarray(pcd.points)
    if len(pts) > max_points:
        idx = np.random.default_rng(42).choice(len(pts), max_points, replace=False)
        pts = pts[idx]
    return pts


def plot_single(pcd, title, color='steelblue'):
    pts = subsample(pcd)
    fig = go.Figure(go.Scatter3d(
        x=pts[:,0], y=pts[:,1], z=pts[:,2],
        mode='markers',
        marker=dict(size=1.5, color=color, opacity=0.7),
        name=f'{len(pcd.points):,} Punkte',
    ))
    fig.update_layout(
        title_text=f'{title} – {len(pcd.points):,} Punkte',
        height=600, scene=dict(aspectmode='data'),
        margin=dict(l=0, r=0, t=40, b=0),
    )
    fig.show()


def plot_before_after(pcd_before, pcd_after, title,
                      color_before='steelblue', color_after='tomato'):
    pts_b = subsample(pcd_before)
    pts_a = subsample(pcd_after)
    removed = len(pcd_before.points) - len(pcd_after.points)
    pct     = removed / len(pcd_before.points) * 100

    fig = go.Figure()
    fig.add_trace(go.Scatter3d(
        x=pts_b[:,0], y=pts_b[:,1], z=pts_b[:,2],
        mode='markers', name=f'Vorher ({len(pcd_before.points):,})',
        marker=dict(size=1.5, color=color_before, opacity=0.5),
        visible=True,
    ))
    fig.add_trace(go.Scatter3d(
        x=pts_a[:,0], y=pts_a[:,1], z=pts_a[:,2],
        mode='markers', name=f'Nachher ({len(pcd_after.points):,})',
        marker=dict(size=1.5, color=color_after, opacity=0.7),
        visible=True,
    ))
    fig.update_layout(
        title_text=f'{title}<br><sup>−{removed:,} Punkte ({pct:.1f}% entfernt)</sup>',
        height=600, scene=dict(aspectmode='data'),
        margin=dict(l=0, r=0, t=60, b=0),
        legend=dict(x=0.01, y=0.99),
    )
    fig.show()


print('Hilfsfunktionen geladen.')

Hilfsfunktionen geladen.


## 4 – Rohe CAD-Punktwolke

In [5]:
plot_single(pcd_raw, f'Roh: {STEP_FILE.stem}', color='steelblue')

## 5 – Step-für-Step
### 5.1 – Statistical Outlier Filter

In [6]:
NB_NEIGHBORS = 20
STD_RATIO    = 2.0

f = StatisticalOutlierFilter(nb_neighbors=NB_NEIGHBORS, std_ratio=STD_RATIO)
pcd_stat, step_report = f.apply(pcd_raw)

print(f'Vorher:   {step_report.points_before:,} Punkte')
print(f'Nachher:  {step_report.points_after:,} Punkte')
print(f'Entfernt: {step_report.points_removed:,} ({100 - step_report.retention_pct:.1f}% entfernt)')

plot_before_after(pcd_raw, pcd_stat,
    f'Statistical Outlier Filter  (nb_neighbors={NB_NEIGHBORS}, std_ratio={STD_RATIO})')

Vorher:   8,192 Punkte
Nachher:  8,042 Punkte
Entfernt: 150 (1.8% entfernt)


### 5.2 – Voxel Grid Downsampling

In [7]:
VOXEL_SIZE = 0.5  # mm

d = VoxelGridDownsampler(voxel_size=VOXEL_SIZE)
pcd_voxel, step_report = d.apply(pcd_stat)

print(f'Vorher:   {step_report.points_before:,} Punkte')
print(f'Nachher:  {step_report.points_after:,} Punkte')
print(f'Entfernt: {step_report.points_removed:,} ({100 - step_report.retention_pct:.1f}% entfernt)')

plot_before_after(pcd_stat, pcd_voxel,
    f'Voxel Grid Downsampling  (voxel_size={VOXEL_SIZE}mm)',
    color_before='steelblue', color_after='mediumseagreen')

Vorher:   8,042 Punkte
Nachher:  7,978 Punkte
Entfernt: 64 (0.8% entfernt)


### 5.3 – Normalenschätzung

In [8]:
NORMAL_RADIUS = 2.0
NORMAL_MAX_NN = 30

n = NormalEstimator(radius=NORMAL_RADIUS, max_nn=NORMAL_MAX_NN, orient_mode='consistent')
pcd_normals, step_report = n.apply(pcd_voxel)

print(f'Punkte:       {step_report.points_after:,}  (unverändert – keine Punkte entfernt)')
print(f'Hat Normalen: {pcd_normals.has_normals()}')

# Normalen als RGB-Farbe visualisieren
normals = np.asarray(pcd_normals.normals)
pts     = np.asarray(pcd_normals.points)
idx = np.random.default_rng(42).choice(len(pts), min(VIZ_MAX_POINTS, len(pts)), replace=False)
colors = (normals[idx] + 1.0) / 2.0
color_hex = [f'rgb({int(r*255)},{int(g*255)},{int(b*255)})' for r,g,b in colors]

fig = go.Figure(go.Scatter3d(
    x=pts[idx,0], y=pts[idx,1], z=pts[idx,2],
    mode='markers',
    marker=dict(size=2, color=color_hex, opacity=0.8),
    name='Normalen (RGB = XYZ-Richtung)',
))
fig.update_layout(
    title_text='Normalenschätzung – Farbe zeigt Normalenrichtung',
    height=600, scene=dict(aspectmode='data'),
    margin=dict(l=0, r=0, t=40, b=0),
)
fig.show()

Punkte:       7,978  (unverändert – keine Punkte entfernt)
Hat Normalen: True


## 6 – Komplette Pipeline (aus pipeline.yaml)

In [9]:
pipeline = PreprocessingPipeline.from_config(CONFIG, source_type='ideal')
print(f'Pipeline: {pipeline}')

pcd_final, report = pipeline.process(pcd_raw)

print(f'\nReport:')
print(f'  Punkte rein:  {report.points_in:,}')
print(f'  Punkte raus:  {report.points_out:,}')
print(f'  Retention:    {report.total_retention_rate:.1%}')
print(f'  Gesamt-Zeit:  {report.total_duration_ms:.0f}ms')
print()
for s in report.steps:
    print(f'  {s.step_name:<35} {s.points_before:>8,} → {s.points_after:>8,}  '
          f'(−{s.points_removed:,}, {s.duration_ms:.1f}ms)')

Pipeline: PreprocessingPipeline(source_type='ideal', steps=['voxel_grid_downsampler', 'normal_estimator'])

Report:
  Punkte rein:  8,192
  Punkte raus:  8,172
  Retention:    99.8%
  Gesamt-Zeit:  670ms

  voxel_grid_downsampler                 8,192 →    8,172  (−20, 1.0ms)
  normal_estimator                       8,172 →    8,172  (−0, 668.8ms)


In [10]:
plot_before_after(pcd_raw, pcd_final,
    'Komplette Pipeline: Roh vs. Preprocessed',
    color_before='steelblue', color_after='tomato')

## 7 – Report als Tabelle und Balkendiagramm

In [11]:
rows = [{'Step': 'roh (input)', 'Punkte': report.points_in, 'Entfernt': 0,
         'Retention': '100%', 'Zeit (ms)': 0}]
for s in report.steps:
    rows.append({
        'Step':       s.step_name.replace('_', ' '),
        'Punkte':     s.points_after,
        'Entfernt':   s.points_removed,
        'Retention':  f'{s.retention_rate:.1%}',
        'Zeit (ms)':  round(s.duration_ms, 1),
    })

df = pd.DataFrame(rows)
display(df)

fig = px.bar(
    df, x='Step', y='Punkte',
    title='Punktanzahl nach jedem Preprocessing-Step',
    color='Punkte', color_continuous_scale='Blues',
    text='Retention',
)
fig.update_traces(textposition='outside')
fig.update_layout(height=400, showlegend=False)
fig.show()

Unnamed: 0,Step,Punkte,Entfernt,Retention,Zeit (ms)
0,roh (input),8192,0,100%,0.0
1,voxel grid downsampler,8172,20,99.8%,1.0
2,normal estimator,8172,0,100.0%,668.8


## 8 – Als WeldVolumeModel speichern

In [12]:
model = WeldVolumeModel(
    model_id=STEP_FILE.stem,
    source_type='ideal',
    source_file=STEP_FILE,
    point_cloud=pcd_final,
    preprocessing_report=report,
)

save_path = model.save(Path('../data/processed/notebook_exploration'))
print(f'Gespeichert: {save_path}')
print(f'Modell:      {model}')

Gespeichert: ../data/processed/notebook_exploration/DVS-Demo
Modell:      WeldVolumeModel(id='DVS-Demo', type='ideal', points=8,172, preprocessed=True, segmented=False)
