## Creating the dataset.json file for the nnuNet training

In [None]:
import os
import json

# === Set root path containing both 'imagesTr' and 'labelsTr' ===
root_dir = r"------ INSERT PATH HERE ----------"
images_dir = os.path.join(root_dir, "imagesTr")
labels_dir = os.path.join(root_dir, "labelsTr")

# === Gather image-label pairs ===
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith("_0000.nii.gz")])
label_files = sorted([f for f in os.listdir(labels_dir) if f.endswith(".nii.gz") and not f.endswith("_0000.nii.gz")])

# === Sanity check ===
if len(image_files) != len(label_files):
    print(f"⚠️ Warning: Number of images ({len(image_files)}) and labels ({len(label_files)}) does not match.")

# === Construct training list ===
training = []
for img, lbl in zip(image_files, label_files):
    training.append({
        "image": f"./imagesTr/{img}",
        "label": f"./labelsTr/{lbl}"
    })

# === Build dataset dictionary ===
dataset_dict = {
    "name": "LungLobeCombinedDataset134",
    "description": "Segmentation of lung lobes from preoperative CT scans (combined public dataset).",
    "tensorImageSize": "3D",
    "modality": {
        "0": "CT"
    },
    "labels": {
        "background": 0,
        "RUL": 1,
        "RML": 2,
        "RLL": 3,
        "LUL": 4,
        "LLL": 5
    },
    "numTraining": len(training),
    "numTest": 0,
    "channel_names": {
        "0": "CT"
    },
    "file_ending": ".nii.gz",
    "training": training,
    "test": []
}

# === Save to dataset.json ===
output_path = os.path.join(root_dir, "dataset.json")
with open(output_path, 'w') as f:
    json.dump(dataset_dict, f, indent=4)

print(f"✅ dataset.json created with {len(training)} training samples at:\n{output_path}")


## dataset.json for the Fissure datasets for the nnuNet training

In [None]:
# === Set root path containing both 'imagesTr' and 'labelsTr' ===
root_dir = r"------ INSERT PATH HERE ------"
images_dir = os.path.join(root_dir, "imagesTr")
labels_dir = os.path.join(root_dir, "labelsTr")

# Fissure configs: suffix in label filenames and dataset naming
FISSURES = [
    {"suffix": "RHF", "name": "RightHorizontalFissure"},
    {"suffix": "ROF", "name": "RightObliqueFissure"},
    {"suffix": "LOF", "name": "LeftObliqueFissure"},
]

def build_dataset_for_fissure(fissure_suffix: str, dataset_name: str):
    # Gather images: <ID>_0000.nii.gz
    image_files = sorted([f for f in os.listdir(images_dir) if f.endswith("_0000.nii.gz")])
    # Gather labels: <ID>_{suffix}.nii.gz
    label_files = sorted([f for f in os.listdir(labels_dir) if f.endswith(f"_{fissure_suffix}.nii.gz")])

    # Build index by ID for robust matching
    def img_id(fname):  # e.g., LLS0020_0000.nii.gz -> LLS0020
        return fname.replace("_0000.nii.gz", "")
    def lbl_id(fname):  # e.g., LLS0020_RHF.nii.gz  -> LLS0020
        return fname.replace(f"_{fissure_suffix}.nii.gz", "")

    images_by_id = {img_id(f): f for f in image_files}
    labels_by_id = {lbl_id(f): f for f in label_files}

    # Intersect IDs
    common_ids = sorted(set(images_by_id.keys()) & set(labels_by_id.keys()))
    missing_imgs = sorted(set(labels_by_id.keys()) - set(images_by_id.keys()))
    missing_lbls = sorted(set(images_by_id.keys()) - set(labels_by_id.keys()))

    if missing_imgs:
        print(f"⚠️ [{fissure_suffix}] Labels without images: {missing_imgs}")
    if missing_lbls:
        print(f"⚠️ [{fissure_suffix}] Images without labels: {missing_lbls}")

    # Construct training list
    training = []
    for cid in common_ids:
        training.append({
            "image": f"./imagesTr/{images_by_id[cid]}",
            "label": f"./labelsTr/{labels_by_id[cid]}"
        })

    dataset_dict = {
        "name": f"LungFissure_{dataset_name}",
        "description": f"Binary segmentation of {dataset_name} (0=background, 1=fissure) on coronal-derived masks.",
        "tensorImageSize": "3D",
        "modality": { "0": "CT" },
        "labels": { "background": 0, "fissure": 1 },
        "numTraining": len(training),
        "numTest": 0,
        "channel_names": { "0": "CT" },
        "file_ending": ".nii.gz",
        "training": training,
        "test": []
    }

    out_path = os.path.join(root_dir, f"dataset_{fissure_suffix}.json")
    with open(out_path, "w") as f:
        json.dump(dataset_dict, f, indent=4)
    print(f"✅ [{fissure_suffix}] dataset.json created with {len(training)} pairs at:\n{out_path}")

# Build datasets for all three fissures
for cfg in FISSURES:
    build_dataset_for_fissure(cfg["suffix"], cfg["name"])
