The dataset.json file is required in the transformaer-based experiment: 


# CT data experiment:

In [None]:
import json
import os
from sklearn.model_selection import train_test_split

# ----------------------------
# Define paths
# ----------------------------
base_dir = "./ct_dataset"
images_tr_dir = os.path.join(base_dir, "imagesTr")
labels_tr_dir = os.path.join(base_dir, "labelsTr")
images_ts_dir = os.path.join(base_dir, "imagesTs")
labels_ts_dir = os.path.join(base_dir, "labelsTs")

# ----------------------------
# Helper function to match images and labels
# ----------------------------
def create_file_pairs(images_dir, labels_dir):
    images = sorted([f for f in os.listdir(images_dir) if f.endswith(".nii.gz")])
    labels = sorted([f for f in os.listdir(labels_dir) if f.endswith(".nii.gz")])
    return [
        {"image": os.path.join(images_dir, img), "label": os.path.join(labels_dir, img)}
        for img in images if img in labels
    ]

# ----------------------------
# Prepare training and validation data
# ----------------------------
data_list_tr = create_file_pairs(images_tr_dir, labels_tr_dir)
train_data, val_data = train_test_split(data_list_tr, test_size=0.2, random_state=42)

# ----------------------------
# Prepare test data
# ----------------------------
data_list_ts = create_file_pairs(images_ts_dir, labels_ts_dir)

# ----------------------------
# Create dataset dictionary
# ----------------------------
dataset_json = {
    "name": "Amos22_CT_Dataset",
    "description": "CT-Dataset ...",
    "tensorImageSize": "4D",
    "modality": {"0": "CT"},
    "labels": {"0": "background", "1": "organ"},
    "numTraining": len(train_data),
    "numValidation": len(val_data),
    "numTest": len(data_list_ts),
    "training": train_data,
    "validation": val_data,
    "test": data_list_ts,
}

# ----------------------------
# Save JSON
# ----------------------------
output_json_path = os.path.join(base_dir, "dataset_split.json")
with open(output_json_path, "w") as f:
    json.dump(dataset_json, f, indent=4)

print(f"Dataset JSON saved at {output_json_path}")

# PI-CAI bpMRI experiment:

In [None]:
import os
import json
from sklearn.model_selection import train_test_split

# ----------------------------
# Dataset directories
# ----------------------------
data_dir = "./workdir/nnUNet_raw/Dataset104_picai"
images_dir = os.path.join(data_dir, "imagesTr")
labels_dir = os.path.join(data_dir, "labelsTr")

# ----------------------------
# List and process cases
# ----------------------------
cases = sorted(
    f[:-9]  # remove "_0000.nii.gz"
    for f in os.listdir(images_dir)
    if f.endswith("_0000.nii.gz")
)

# ----------------------------
# Split into training and validation (80/20)
# ----------------------------
train_cases, val_cases = train_test_split(cases, test_size=0.2, random_state=42)
train_cases, val_cases = sorted(train_cases), sorted(val_cases)

# ----------------------------
# Modality suffix map
# ----------------------------
suffix_map = {"T2W": "_0000", "ADC": "_0001", "HBV": "_0002"}

# ----------------------------
# Build dataset JSON structure
# ----------------------------
def build_dataset_entry(cases_list):
    return [
        {
            "image": [os.path.join(images_dir, f"{case}{suffix_map[mod]}.nii.gz") for mod in ["T2W", "ADC", "HBV"]],
            "label": os.path.join(labels_dir, f"{case}.nii.gz")
        }
        for case in cases_list
    ]

dataset_dict = {
    "channel_names": { "0": "T2W", "1": "ADC", "2": "HBV" },
    "labels": { "background": 0, "lesion": 1 },
    "numTraining": len(train_cases),
    "file_ending": ".nii.gz",
    "name": "picai_nnunetv2",
    "reference": "none",
    "release": "1.0",
    "description": "bpMRI scans from PI-CAI dataset to train by nnUNetv2",
    "overwrite_image_reader_writer": "SimpleITKIO",
    "training": build_dataset_entry(train_cases),
    "validation": build_dataset_entry(val_cases)
}

# ----------------------------
# Save JSON file
# ----------------------------
output_path = os.path.join(data_dir, "dataset.json")
with open(output_path, "w") as json_file:
    json.dump(dataset_dict, json_file, indent=4)

print(f"Dataset JSON saved: {output_path}")