# YOLO-based Segmentation of Recoater Streaking & Spatter

This notebook uses a pre-trained YOLOv8 segmentation model to detect recoater streaks and spatter
in each layer of a Laser Powder Bed Fusion build. We then compute per-layer anomaly areas for
downstream analysis.

---

## 1. Setup & Imports

In [1]:
import os

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml
from src.utils.yolo_segmentation import (
    batch_predict_and_compute_areas,
    load_hdf5_slice,
    load_hdf5_stack,
    visualize_detections,
)
from ultralytics import YOLO

In [2]:
BASE = os.path.abspath("../data/tcr_phase1_build2")
DATA_PATH     = os.path.abspath("../data/2021-04-16 TCR Phase 1 Build 2.hdf5") 
CAMERA_PATH   = "slices/camera_data/visible/0"
IMG_TRAIN_DIR = os.path.abspath("../data/tcr_phase1_build2/images/train")
LBL_TRAIN_DIR = os.path.abspath("../data/tcr_phase1_build2/labels/train") 
DATA_YAML     = os.path.abspath("data.yaml") 

# Classes
CLASS_MAP = {1: "spatter", 2: "streak"}
PIXEL_SIZE_MM2 = 0.01  # adjust to your calibration

# YOLO parameters
WEIGHTS      = "yolov8s-seg.pt" # Pretrained weights for initial model
EPOCHS       = 5
BATCH_SIZE   = 8
IMG_SIZE     = 640
CONF_THRESH  = 0.25

In [3]:
cfg = {
    "path": BASE,           # root for both images/ and labels/ (or masks/)
    "train": "images/train", 
    "val":   "images/val",
    "nc":    len(CLASS_MAP),
    "names": list(CLASS_MAP.values()),
}

with open(DATA_YAML, "w") as f:
    yaml.dump(cfg, f, sort_keys=False)
print("Wrote", DATA_YAML)

Wrote /piml-in-metal-am/notebooks/data.yaml


In [None]:
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# model = YOLO(WEIGHTS)

# model.train(
#     data=DATA_YAML,
# 	task="segment",
#     epochs=EPOCHS,
#     imgsz=IMG_SIZE,
#     batch=BATCH_SIZE,
#     project="runs/segment",
#     name="yolo_spatter_streak",
# )

logger.info("Starting YOLO model training...")
try:
    model = YOLO("yolov8n-seg.yaml")
    model.train(
        data=DATA_YAML,
        task="segment",
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        batch=16,
        workers=8,
        cache=True,       # preload into RAM
        half=True,        # FP16
        rect=True,        # rectangular batches
        freeze=10,        # freeze backbone
        augment=False,    # simple aug only
        project="../runs/segment",
        name="yolo_fast",
    )
    logger.info("Training completed successfully.")
except Exception as e:
    logger.error("An error occurred during training:", exc_info=True)

INFO:__main__:Starting YOLO model training...


New https://pypi.org/project/ultralytics/8.3.133 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.129 🚀 Python-3.12.10 torch-2.7.0+cu126 CPU (Intel Core(TM) i7-10750H 2.60GHz)
Ultralytics 8.3.129 🚀 Python-3.12.10 torch-2.7.0+cu126 CPU (Intel Core(TM) i7-10750H 2.60GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/piml-in-metal-am/notebooks/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=10, half=True, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, 

[34m[1mtrain: [0mScanning /piml-in-metal-am/data/tcr_phase1_build2/labels/train.cache... 3027 images, 5 backgrounds, 0 corrupt: 100%|██████████| 3027/3027 [00:00<?, ?it/s]



[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 563.5±51.5 MB/s, size: 480.3 KB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 563.5±51.5 MB/s, size: 480.3 KB)


[34m[1mval: [0mScanning /piml-in-metal-am/data/tcr_phase1_build2/labels/val.cache... 534 images, 1 backgrounds, 0 corrupt: 100%|██████████| 534/534 [00:00<?, ?it/s]





[34m[1mval: [0mCaching images (0.2GB RAM):  30%|███       | 162/534 [00:00<00:01, 190.22it/s]



KeyboardInterrupt: 

In [None]:
finetuned = YOLO("runs/segment/yolo_spatter_streak/weights/best.pt")
# test on layer 0
with h5py.File(DATA_PATH, "r") as h5:
    img0 = load_hdf5_slice(DATA_PATH, 0, CAMERA_PATH)
res0 = finetuned(img0, imgsz=IMG_SIZE, conf=CONF_THRESH)
vis0 = visualize_detections(img0, res0)
plt.imshow(vis0); plt.axis("off"); plt.title("Layer 0 - Fine-tuned")

In [None]:
# 1) load full stack into memory (or chunk manually if too big)
stack = load_hdf5_stack(DATA_PATH, CAMERA_PATH)  # shape (N,H,W,3)
N = stack.shape[0]

# 2) preallocate
recoater_areas = np.zeros(N, dtype=float)
spatter_areas  = np.zeros(N, dtype=float)

# 3) run in batches
for i in range(0, N, BATCH_SIZE):
    batch = stack[i : i + BATCH_SIZE]
    recoater_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [2], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    spatter_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [1], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    print(f"Processed layers {i}–{i+BATCH_SIZE}")

# 4) assemble DataFrame
df = pd.DataFrame({
    "layer": np.arange(N),
    "recoater_mm2": recoater_areas,
    "spatter_mm2": spatter_areas,
})
df.head()

In [None]:
plt.figure(figsize=(10,4))
plt.plot(df.layer, df.recoater_mm2, label="Recoater")
plt.plot(df.layer, df.spatter_mm2, label="Spatter")
plt.xlabel("Layer"); plt.ylabel("Anomaly Area (mm²)")
plt.legend(); plt.title("Per-Layer Anomaly Areas")
plt.show()