## Creating the dataset.json file

In [None]:
import os
import json

# === Set root path containing both 'imagesTr' and 'labelsTr' ===
root_dir = r"------ INSERT PATH HERE ----------"
images_dir = os.path.join(root_dir, "imagesTr")
labels_dir = os.path.join(root_dir, "labelsTr")

# === Gather image-label pairs ===
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith("_0000.nii.gz")])
label_files = sorted([f for f in os.listdir(labels_dir) if f.endswith(".nii.gz") and not f.endswith("_0000.nii.gz")])

# === Sanity check ===
if len(image_files) != len(label_files):
    print(f"⚠️ Warning: Number of images ({len(image_files)}) and labels ({len(label_files)}) does not match.")

# === Construct training list ===
training = []
for img, lbl in zip(image_files, label_files):
    training.append({
        "image": f"./imagesTr/{img}",
        "label": f"./labelsTr/{lbl}"
    })

# === Build dataset dictionary ===
dataset_dict = {
    "name": "LungLobeCombinedDataset122",
    "description": "Segmentation of lung lobes from preoperative CT scans (combined public dataset).",
    "tensorImageSize": "3D",
    "modality": {
        "0": "CT"
    },
    "labels": {
        "background": 0,
        "RUL": 1,
        "RML": 2,
        "RLL": 3,
        "LUL": 4,
        "LLL": 5
    },
    "numTraining": len(training),
    "numTest": 0,
    "channel_names": {
        "0": "CT"
    },
    "file_ending": ".nii.gz",
    "training": training,
    "test": []
}

# === Save to dataset.json ===
output_path = os.path.join(root_dir, "dataset.json")
with open(output_path, 'w') as f:
    json.dump(dataset_dict, f, indent=4)

print(f"✅ dataset.json created with {len(training)} training samples at:\n{output_path}")
