
# Prepare Roboflow Semantic-Mask Export for YOLOv8-seg

This notebook validates your Roboflow **semantic segmentation mask** export (PNG masks), optionally remaps pixel values (e.g. `255 -> 1`), prepares a YOLOv8-friendly dataset folder, writes a `data.yaml`, and shows a visual overlay example.

**Before running:** update the `ROOT_DIR`, `OUT_DIR`, `NUM_CLASSES`, `CLASS_NAMES`, and `MAPPING` variables in the configuration cell.


In [None]:
import torch, sys, traceback, os, yaml, glob
import ultralytics
from ultralytics import YOLO

from pathlib import Path

import matplotlib.pyplot as plt
from matplotlib.patches import Patch

from PIL import Image
import numpy as np

print("python:", sys.version.splitlines()[0])
print("torch:", torch.__version__)
print("torch.cuda_available:", torch.cuda.is_available())
print("ultralytics:", getattr(ultralytics, "__version__", "unknown"))


# Cell 2: Load the prepared dataset
data_yaml = '/home/double/Documents/school/yolo_test/yolov8_data/data.yaml'  # Path created by your prep notebook
if not os.path.exists(data_yaml):
    raise FileNotFoundError(f"Dataset YAML not found: {data_yaml}")


model = YOLO('yolov8n-seg.pt')

In [None]:
# Cell 1 — env & dataset sanity check


print("python:", os.sys.version.splitlines()[0])
print("torch:", torch.__version__)
print("cuda available:", torch.cuda.is_available())

# Path to the data.yaml your prep step created
data_yaml = '/home/double/Documents/school/yolo_testV2/yolo-dataset/data.yaml'

if not os.path.exists(data_yaml):
    raise FileNotFoundError(f"Dataset YAML not found: {data_yaml}")

with open(data_yaml) as f:
    cfg = yaml.safe_load(f)
print("data.yaml contents:\n", cfg)

# quick file counts for images (expected structure: absolute paths in data.yaml)
for key in ('train','val'):
    p = cfg.get(key)
    if p and os.path.isdir(p):
        n = len(glob.glob(os.path.join(p, '*.*')))
        print(f"{key} -> {n} files at {p}")
    else:
        print(f"{key} -> path missing or not a dir: {p}")


In [None]:
# Cell 2 — ensure model object exists (recreate if kernel restarted)
device = 0 if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# Use the official tiny segmentation model as a starting point
model = YOLO('/home/double/Documents/school/yolo_testV2/yolov8n-seg.pt')   # will download if missing


In [None]:

device = 0 if torch.cuda.is_available() else 'cpu'
print("Running YOLO learning on device:", device)

# tiny settings to minimize runtime and memory use
sanity_cfg = dict(
    data=data_yaml,
    #epochs=200, 120 
    epochs=70,
    imgsz=640,    # reduce to 320 if you get OOM
    batch=32,      # set to 1 if OOM
    workers=2,
    device=device,
    name='sanity_check',
    exist_ok=True,  # overwrite run folder if presentd
)

try:
    model.train(**sanity_cfg)
    print("\nLearning completed. If you have a val split, validation was run automatically.")
    print("Check runs/segment/sanity_check for logs and weights (best.pt, last.pt).")
except RuntimeError as e:
    print("\nRuntimeError during training:")
    traceback.print_exc()
    msg = str(e).lower()
    if 'out of memory' in msg or 'cuda' in msg:
        print("\nLikely CUDA OOM. Fixes:")
        print("- reduce batch (try batch=1)")
        print("- reduce imgsz to 320 or 512")
        print("- set device='cpu' for a quick test (very slow)")
    else:
        print("\nPossible dataset/format issue. Quick checks:")
        print("- ensure data_yaml points to correct image folders")
        print("- ensure masks are indexed 0..nc-1 (use np.unique on some masks)")
        print("- inspect the notebook output above for loader/augment stacktrace")
except Exception as e:
    print("\nUnexpected error during sanity training:")
    traceback.print_exc()
    print("\nIf this is an UnpicklingError (related to ultralytics.nn.tasks.SegmentationModel), run the allowlist snippet from Cell 2 and re-run Cell 2 & 3.")
