# nnU-Net v2 (KSSD2025) — Hugging Face Space Notebook

This notebook is structured for **Hugging Face Spaces (JupyterLab)**.
It:
1) Downloads the dataset from a Hugging Face dataset repo  
2) Converts TIFF → NIfTI (.nii.gz)  
3) Builds `nnUNet_raw/Dataset501_KSSD`  
4) Runs `plan_and_preprocess`  
5) Trains a selected fold

> **Tip:** For Spaces, enable **Persistent Storage** so `/data` keeps your preprocessing + checkpoints.


In [None]:
# =========================
# Cell 1 — Install deps
# =========================
!pip -q install nnunetv2 nibabel SimpleITK acvl-utils tifffile huggingface_hub

import os
from pathlib import Path


In [None]:
# =========================
# Cell 2 — Paths + download dataset
# =========================
from huggingface_hub import snapshot_download

# Persistent storage path in HF Spaces (recommended)
BASE = Path("/data/nnunet_kssd").resolve()
BASE.mkdir(parents=True, exist_ok=True)

DATASET_REPO = "thecodex/KSSD2025KidneyStoneSegmentationDataset"  # change if your repo differs
LOCAL_DATA = BASE / "kssd_raw_download"

# Downloads the dataset repo files into LOCAL_DATA
snapshot_download(
    repo_id=DATASET_REPO,
    repo_type="dataset",
    local_dir=str(LOCAL_DATA),
    local_dir_use_symlinks=False,
)

print("Downloaded to:", LOCAL_DATA)
print("Top-level items:", [p.name for p in LOCAL_DATA.iterdir()][:20])


In [None]:
# =========================
# Cell 3 — Build nnU-Net dataset (TIFF → NIfTI)
# =========================
import numpy as np
import tifffile
import nibabel as nib
import shutil

# nnU-Net folder roots (inside persistent storage)
nnUNet_raw = BASE / "nnUNet_raw"
nnUNet_preprocessed = BASE / "nnUNet_preprocessed"
nnUNet_results = BASE / "nnUNet_results"

os.environ["nnUNet_raw"] = str(nnUNet_raw)
os.environ["nnUNet_preprocessed"] = str(nnUNet_preprocessed)
os.environ["nnUNet_results"] = str(nnUNet_results)

dataset_id = 501
dataset_name = "Dataset501_KSSD"
dataset_dir = nnUNet_raw / dataset_name
imagesTr = dataset_dir / "imagesTr"
labelsTr = dataset_dir / "labelsTr"

# Clean + recreate dataset folder
if dataset_dir.exists():
    shutil.rmtree(dataset_dir)
imagesTr.mkdir(parents=True, exist_ok=True)
labelsTr.mkdir(parents=True, exist_ok=True)

# Locate TIFF folders inside the downloaded dataset
# Expected: .../image/*.tif and .../label/*.tif (adjust if your repo layout differs)
img_tifs = sorted(LOCAL_DATA.rglob("image/*.tif"))
lbl_tifs = sorted(LOCAL_DATA.rglob("label/*.tif"))

assert len(img_tifs) > 0, "No image TIFFs found under **image/*.tif**. Check your dataset repo layout."
assert len(lbl_tifs) > 0, "No label TIFFs found under **label/*.tif**. Check your dataset repo layout."
assert len(img_tifs) == len(lbl_tifs), f"Image/label count mismatch: {len(img_tifs)} vs {len(lbl_tifs)}"

print(f"Found {len(img_tifs)} image TIFFs and {len(lbl_tifs)} label TIFFs")

# Convert each 2D TIFF slice to a 3D NIfTI volume with a singleton Z dim (H,W,1)
# nnU-Net expects:
#  - imagesTr: case_xxxx_0000.nii.gz
#  - labelsTr: case_xxxx.nii.gz
for idx, (it, lt) in enumerate(zip(img_tifs, lbl_tifs), start=1):
    case_id = f"case_{idx:04d}"

    img = tifffile.imread(it)
    if img.ndim == 2:
        img = img[:, :, np.newaxis]
    if img.dtype not in (np.float32, np.int16, np.uint16, np.int32):
        img = img.astype(np.float32)

    lbl = tifffile.imread(lt)
    if lbl.ndim == 2:
        lbl = lbl[:, :, np.newaxis]
    # binarize labels (0/1)
    lbl = (lbl > 0).astype(np.uint8)

    nib.save(nib.Nifti1Image(img, affine=np.eye(4)), imagesTr / f"{case_id}_0000.nii.gz")
    nib.save(nib.Nifti1Image(lbl, affine=np.eye(4)), labelsTr / f"{case_id}.nii.gz")

print("Done. Example files:")
print("imagesTr:", sorted([p.name for p in imagesTr.glob('*.nii.gz')])[:3])
print("labelsTr:", sorted([p.name for p in labelsTr.glob('*.nii.gz')])[:3])


In [None]:
# =========================
# Cell 4 — dataset.json
# =========================
import json

dataset_json = {
    "channel_names": {"0": "CT"},
    "labels": {"background": 0, "stone": 1},
    "numTraining": len(list(imagesTr.glob("*.nii.gz"))),
    "file_ending": ".nii.gz",
}
(dataset_dir / "dataset.json").write_text(json.dumps(dataset_json, indent=2))
print("Wrote:", dataset_dir / "dataset.json")
print((dataset_dir / "dataset.json").read_text()[:500])


In [None]:
# =========================
# Cell 5 — Plan + preprocess
# =========================
!nnUNetv2_plan_and_preprocess -d 501 --verify_dataset_integrity


In [None]:
# =========================
# Cell 6 — Train (choose fold)
# =========================
FOLD = 0   # set 0..4
CONFIG = "2d"

!nnUNetv2_train 501 {CONFIG} {FOLD}
