# YOLO-based Segmentation of Recoater Streaking & Spatter

This notebook uses a pre-trained YOLOv8 segmentation model to detect recoater streaks and spatter
in each layer of a Laser Powder Bed Fusion build. We then compute per-layer anomaly areas for
downstream analysis.

---

## 1. Setup & Imports

In [2]:
import os

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml
from src.utils.yolo_segmentation import (
    batch_predict_and_compute_areas,
    load_hdf5_slice,
    load_hdf5_stack,
    visualize_detections,
)
from ultralytics import YOLO


In [3]:
DATA_PATH     = os.path.abspath("../data/2021-04-16 TCR Phase 1 Build 2.hdf5") 
CAMERA_PATH   = "slices/camera_data/visible/0"
IMG_TRAIN_DIR = os.path.abspath("../data/tcr_phase1_build2/images/train")
LBL_TRAIN_DIR = os.path.abspath("../data/tcr_phase1_build2/labels/train") 
DATA_YAML     = os.path.abspath("data.yaml") 

# Classes
CLASS_MAP = {1: "spatter", 2: "streak"}
PIXEL_SIZE_MM2 = 0.01  # adjust to your calibration

# YOLO parameters
WEIGHTS      = "yolov8s-seg.pt" # Pretrained weights for initial model
EPOCHS       = 50
BATCH_SIZE   = 8
IMG_SIZE     = 640
CONF_THRESH  = 0.25


In [None]:
cfg = {
    "train": IMG_TRAIN_DIR,
    "val":   IMG_TRAIN_DIR,    # replace with real val split if you have one
    "nc":    len(CLASS_MAP),
    "names": list(CLASS_MAP.values()),
}
with open(DATA_YAML, "w") as f:
    yaml.dump(cfg, f)
print("Wrote", DATA_YAML)


In [None]:
model = YOLO(WEIGHTS)
model.train(
    data=DATA_YAML,
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    project="runs/segment",
    name="yolo_spatter_streak",
)

In [None]:
finetuned = YOLO("runs/segment/yolo_spatter_streak/weights/best.pt")
# test on layer 0
with h5py.File(DATA_PATH, "r") as h5:
    img0 = load_hdf5_slice(DATA_PATH, 0, CAMERA_PATH)
res0 = finetuned(img0, imgsz=IMG_SIZE, conf=CONF_THRESH)
vis0 = visualize_detections(img0, res0)
plt.imshow(vis0); plt.axis("off"); plt.title("Layer 0 - Fine-tuned")

In [None]:
# 1) load full stack into memory (or chunk manually if too big)
stack = load_hdf5_stack(DATA_PATH, CAMERA_PATH)  # shape (N,H,W,3)
N = stack.shape[0]

# 2) preallocate
recoater_areas = np.zeros(N, dtype=float)
spatter_areas  = np.zeros(N, dtype=float)

# 3) run in batches
for i in range(0, N, BATCH_SIZE):
    batch = stack[i : i + BATCH_SIZE]
    recoater_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [2], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    spatter_areas[i : i + BATCH_SIZE] = batch_predict_and_compute_areas(
        finetuned, batch, [1], PIXEL_SIZE_MM2, imgsz=IMG_SIZE, conf=CONF_THRESH
    )
    print(f"Processed layers {i}–{i+BATCH_SIZE}")

# 4) assemble DataFrame
df = pd.DataFrame({
    "layer": np.arange(N),
    "recoater_mm2": recoater_areas,
    "spatter_mm2": spatter_areas,
})
df.head()


In [None]:
plt.figure(figsize=(10,4))
plt.plot(df.layer, df.recoater_mm2, label="Recoater")
plt.plot(df.layer, df.spatter_mm2, label="Spatter")
plt.xlabel("Layer"); plt.ylabel("Anomaly Area (mm²)")
plt.legend(); plt.title("Per-Layer Anomaly Areas")
plt.show()