# YOLO-based Segmentation of Recoater Streaking & Spatter

This notebook uses a pre-trained YOLOv8 segmentation model to detect recoater streaks and spatter
in each layer of a Laser Powder Bed Fusion build. We then compute per-layer anomaly areas for
downstream analysis.

---

## 1. Setup & Imports

In [1]:
import os

import cv2
import h5py
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml
from src.utils.yolo_segmentation import (
    batch_predict_and_compute_areas,
    load_hdf5_slice,
    load_hdf5_stack,
    load_yolo_model,
    visualize_detections,
)
from ultralytics import YOLO


In [2]:
# Paths
DATA_PATH     = os.path.abspath("../data/2021-07-13 TCR Phase 1 Build 1.hdf5")
CAMERA_PATH   = "slices/camera_data/visible/0"
IMG_TRAIN_DIR = os.path.abspath("../data/images/train")
LBL_TRAIN_DIR = os.path.abspath("../data/labels/train")
DATA_YAML     = os.path.abspath("data.yaml")

# Classes
CLASS_MAP = {1: "spatter", 2: "streak"}
PIXEL_SIZE_MM2 = 0.01  # adjust to your calibration

# YOLO parameters
WEIGHTS      = "yolov8s-seg.pt"
EPOCHS       = 50
BATCH_SIZE   = 8
IMG_SIZE     = 640
CONF_THRESH  = 0.25


In [8]:
cfg = {
    "train": IMG_TRAIN_DIR,
    "val":   IMG_TRAIN_DIR,    # replace with real val split if you have one
    "nc":    len(CLASS_MAP),
    "names": list(CLASS_MAP.values()),
}
with open(DATA_YAML, "w") as f:
    yaml.dump(cfg, f)
print("Wrote", DATA_YAML)


Wrote /piml-in-metal-am/notebooks/data.yaml


## 2. Prepare Data for YOLO Training

Before training the YOLO model, you need to prepare your dataset. This involves exporting images and their corresponding segmentation masks (labels) from the HDF5 file into a format that YOLO can understand (typically PNG files for images and masks).

The script `src/utils/export_yolo_training_data.py` has been created to handle this process.

**Run the export script from your project root directory in the terminal:**
```bash
python src/utils/export_yolo_training_data.py
```

This script will populate the `data/images/train` and `data/labels/train` directories, which are referenced by the `data.yaml` file used by YOLO for training.

**Make sure you have run this script successfully before proceeding to the training step below.**

In [None]:
# Ensure output directories exist
os.makedirs(IMG_TRAIN_DIR, exist_ok=True)
os.makedirs(LBL_TRAIN_DIR, exist_ok=True)
print(f"Ensured training image directory exists: {IMG_TRAIN_DIR}")
print(f"Ensured training label directory exists: {LBL_TRAIN_DIR}")

# Export images and masks
print(f"Reading HDF5 data from: {DATA_PATH}")
exported_n_layers = 0
if not os.path.exists(DATA_PATH):
    print(f"ERROR: HDF5 file not found at {DATA_PATH}. Please check the DATA_PATH variable.")
else:
    with h5py.File(DATA_PATH, "r") as h5:
        if CAMERA_PATH not in h5:
            print(f"ERROR: CAMERA_PATH '{CAMERA_PATH}' not found in HDF5 file.")
        elif "slices/segmentation_results/8" not in h5 or "slices/segmentation_results/3" not in h5:
            print(f"ERROR: Segmentation result paths for spatter (8) or streak (3) not found in HDF5 file.")
        else:
            exported_n_layers = h5[CAMERA_PATH].shape[0]
            print(f"Exporting {exported_n_layers} layers as images and masks...")
            for layer in range(exported_n_layers):
                img = load_hdf5_slice(DATA_PATH, layer, CAMERA_PATH)
                imageio.imwrite(os.path.join(IMG_TRAIN_DIR, f"{layer:05d}.png"), img)
                
                label = np.zeros(img.shape[:2], dtype=np.uint8)  # Background = 0
                # Spatter (HDF5 class 8) -> YOLO class 0 (pixel value 1 in mask)
                sp_mask = h5["slices/segmentation_results/8"][layer].astype(bool)
                label[sp_mask] = 1
                # Streak (HDF5 class 3) -> YOLO class 1 (pixel value 2 in mask)
                st_mask = h5["slices/segmentation_results/3"][layer].astype(bool)
                label[st_mask] = 2
                imageio.imwrite(os.path.join(LBL_TRAIN_DIR, f"{layer:05d}.png"), label)
            print(f"Export complete: {exported_n_layers} layers exported to {IMG_TRAIN_DIR} and {LBL_TRAIN_DIR}")

In [None]:
# Verify export
print(f"Checking content of {IMG_TRAIN_DIR} (expected for training)...")
train_images_found = []
if os.path.exists(IMG_TRAIN_DIR) and os.path.isdir(IMG_TRAIN_DIR):
    train_images_found = os.listdir(IMG_TRAIN_DIR)
    print(f"Found {len(train_images_found)} files in {IMG_TRAIN_DIR}.")
    if len(train_images_found) > 0:
        print(f"First 5 images: {train_images_found[:5]}")
    else:
        print(f"WARNING: {IMG_TRAIN_DIR} is empty. Training will likely fail.")
else:
    print(f"ERROR: {IMG_TRAIN_DIR} does not exist or is not a directory. Training will fail.")

print(f"Checking content of {LBL_TRAIN_DIR} (expected for training)...")
train_labels_found = []
if os.path.exists(LBL_TRAIN_DIR) and os.path.isdir(LBL_TRAIN_DIR):
    train_labels_found = os.listdir(LBL_TRAIN_DIR)
    print(f"Found {len(train_labels_found)} files in {LBL_TRAIN_DIR}.")
    if len(train_labels_found) > 0:
        print(f"First 5 labels: {train_labels_found[:5]}")
    else:
        print(f"WARNING: {LBL_TRAIN_DIR} is empty. Training will likely fail.")
else:
    print(f"ERROR: {LBL_TRAIN_DIR} does not exist or is not a directory. Training will fail.")

# Check if the number of images and labels match expected output from export cell
if 'exported_n_layers' in locals() and exported_n_layers > 0:
    if len(train_images_found) == exported_n_layers and len(train_labels_found) == exported_n_layers:
        print(f"Number of images ({len(train_images_found)}) and labels ({len(train_labels_found)}) matches expected number of layers ({exported_n_layers}).")
    else:
        print(f"WARNING: Mismatch after export! Expected: {exported_n_layers}, Found images: {len(train_images_found)}, Found labels: {len(train_labels_found)}")
elif 'exported_n_layers' in locals() and exported_n_layers == 0:
    print(f"NOTE: Export cell reported 0 layers exported. Check HDF5 content and paths if this is unexpected.")
else:
    print(f"WARNING: 'exported_n_layers' variable not found from export cell. Cannot confirm counts against expected.")

In [10]:
# Ensure output directories exist and export images/labels for training
print(f"Target training image directory: {IMG_TRAIN_DIR}")
print(f"Target training label directory: {LBL_TRAIN_DIR}")
os.makedirs(IMG_TRAIN_DIR, exist_ok=True)
os.makedirs(LBL_TRAIN_DIR, exist_ok=True)

print(f"Reading HDF5 data from: {DATA_PATH}")
exported_n_layers = 0
if not os.path.exists(DATA_PATH):
    print(f"ERROR: HDF5 file not found at {DATA_PATH}. Please check the DATA_PATH variable and ensure the file exists.")
else:
    try:
        with h5py.File(DATA_PATH, "r") as h5:
            if CAMERA_PATH not in h5:
                print(f"ERROR: CAMERA_PATH '{CAMERA_PATH}' not found in HDF5 file. Available top-level keys: {list(h5.keys())}")
            elif "slices/segmentation_results/8" not in h5 or "slices/segmentation_results/3" not in h5:
                print(f"ERROR: Segmentation result paths for spatter (HDF5 class 8) or streak (HDF5 class 3) not found in HDF5 file.")
                if "slices/segmentation_results" in h5:
                    print(f"Available segmentation classes: {list(h5["slices/segmentation_results"].keys())}")
            else:
                exported_n_layers = h5[CAMERA_PATH].shape[0]
                print(f"Exporting {exported_n_layers} layers as images and masks...")
                for layer in range(exported_n_layers):
                    img = load_hdf5_slice(DATA_PATH, layer, CAMERA_PATH)
                    imageio.imwrite(os.path.join(IMG_TRAIN_DIR, f"{layer:05d}.png"), img)
                    
                    label = np.zeros(img.shape[:2], dtype=np.uint8)  # Background = 0
                    # Spatter (HDF5 class 8) -> YOLO class 0 (pixel value 1 in mask)
                    sp_mask = h5["slices/segmentation_results/8"][layer].astype(bool)
                    label[sp_mask] = 1
                    # Streak (HDF5 class 3) -> YOLO class 1 (pixel value 2 in mask)
                    st_mask = h5["slices/segmentation_results/3"][layer].astype(bool)
                    label[st_mask] = 2
                    imageio.imwrite(os.path.join(LBL_TRAIN_DIR, f"{layer:05d}.png"), label)
                print(f"Export complete: {exported_n_layers} layers exported to {IMG_TRAIN_DIR} and {LBL_TRAIN_DIR}")
    except Exception as e:
        print(f"An error occurred during HDF5 processing or file export: {e}")

Ultralytics 8.3.129 🚀 Python-3.12.10 torch-2.7.0+cu126 CPU (Intel Core(TM) i7-10750H 2.60GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/piml-in-metal-am/notebooks/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolo_spatter_streak2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, 

RuntimeError: Dataset '/piml-in-metal-am/notebooks/data.yaml' error ❌ Dataset '/piml-in-metal-am/notebooks/data.yaml' images not found, missing path '/piml-in-metal-am/data/images/train'
Note dataset download directory is '/piml-in-metal-am/notebooks/datasets'. You can update this in '/home/vscode/.config/Ultralytics/settings.json'

In [None]:
finetuned = YOLO("runs/segment/yolo_spatter_streak/weights/best.pt")
# test on layer 0
with h5py.File(DATA_PATH, "r") as h5:
    img0 = load_hdf5_slice(DATA_PATH, 0, CAMERA_PATH)
res0 = finetuned(img0, imgsz=IMG_SIZE, conf=CONF_THRESH)
vis0 = visualize_detections(img0, res0)
plt.imshow(vis0); plt.axis("off"); plt.title("Layer 0 - Fine-tuned")

In [None]:
# 1) load full stack into memory (or chunk manually if too big)
stack = load_hdf5_stack(DATA_PATH, CAMERA_PATH)  # shape (N,H,W,3)
N = stack.shape[0]

# 2) preallocate
recoater_areas = np.zeros(N, dtype=float)
spatter_areas  = np.zeros(N, dtype=float)

# 3) run in batches
for i in range(0, N, BATCH_SIZE):
    batch = stack[i : i + BATCH_SIZE]
    recoater_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [2], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    spatter_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [1], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    print(f"Processed layers {i}–{i+BATCH_SIZE}")

# 4) assemble DataFrame
df = pd.DataFrame({
    "layer": np.arange(N),
    "recoater_mm2": recoater_areas,
    "spatter_mm2": spatter_areas,
})
df.head()


In [None]:
plt.figure(figsize=(10,4))
plt.plot(df.layer, df.recoater_mm2, label="Recoater")
plt.plot(df.layer, df.spatter_mm2, label="Spatter")
plt.xlabel("Layer"); plt.ylabel("Anomaly Area (mm²)")
plt.legend(); plt.title("Per-Layer Anomaly Areas")
plt.show()


In [3]:
model = load_yolo_model(MODEL_WEIGHTS)

In [5]:
n_layers =  len(h5py.File(DATA_PATH, "r")[CAMERA_PATH])
areas = []
for layer in range(n_layers):
    img = load_hdf5_slice(DATA_PATH, layer, CAMERA_PATH)
    results = run_yolo_segmentation(model, img)
    ar = compute_anomaly_area(
        extract_anomaly_mask(results, RECOATER_CLASS, img.shape[:2]),
        pixel_size_mm2=0.01)
    sp = compute_anomaly_area(
        extract_anomaly_mask(results, SPATTER_CLASS, img.shape[:2]),
        pixel_size_mm2=0.01)
    areas.append({"layer": layer, "recoater_mm2": ar, "spatter_mm2": sp})

df = pd.DataFrame(areas)
df.head()



0: 640x640 (no detections), 296.5ms
Speed: 3.8ms preprocess, 296.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
0: 640x640 (no detections), 296.5ms
Speed: 3.8ms preprocess, 296.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)


0: 640x640 (no detections), 295.4ms
Speed: 3.5ms preprocess, 295.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)
0: 640x640 (no detections), 295.4ms
Speed: 3.5ms preprocess, 295.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)


0: 640x640 1 clock, 277.5ms
Speed: 3.3ms preprocess, 277.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)
0: 640x640 1 clock, 277.5ms
Speed: 3.3ms preprocess, 277.5ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)


0: 640x640 1 tennis racket, 278.9ms
Speed: 4.5ms preprocess, 278.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)
0: 640x640 1 tennis racket, 278.9ms
Speed: 4.5ms preprocess, 278.9ms in

KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(10,4))
plt.plot(df.layer, df.recoater_mm2, label="Recoater Streaking")
plt.plot(df.layer, df.spatter_mm2, label="Spatter")
plt.xlabel("Layer")
plt.ylabel("Anomaly Area (mm²)")
plt.legend()
plt.title("Per-Layer Anomaly Areas")
plt.show()

In [None]:
os.makedirs(IMG_TRAIN_DIR, exist_ok=True)
os.makedirs(LBL_TRAIN_DIR, exist_ok=True)

with h5py.File(DATA_PATH, "r") as h5:
    n_layers = h5[CAMERA_PATH].shape[0]
    for layer in range(n_layers):
        # save raw image
        img = load_hdf5_slice(DATA_PATH, layer, CAMERA_PATH)
        imageio.imwrite(f"{IMG_TRAIN_DIR}/{layer:05d}.png", img)
        # combine masks
        label = np.zeros(img.shape[:2], dtype=np.uint8)  # Background = 0
        # Spatter (from HDF5 class 8) assigned to pixel value 1 (for YOLO class 0)
        sp_mask = h5["slices/segmentation_results/8"][layer].astype(bool)
        label[sp_mask] = 1
        # Streak (from HDF5 class 3) assigned to pixel value 2 (for YOLO class 1)
        st_mask = h5["slices/segmentation_results/3"][layer].astype(bool)
        label[st_mask] = 2
        imageio.imwrite(f"{LBL_TRAIN_DIR}/{layer:05d}.png", label)
print("Export complete:", n_layers, "layers")
