In [4]:
import os
import json
import pandas as pd

# Define paths
base_dir = r"...\nnUNet\nnUNet_raw\Dataset001_Pancreas" #Add to the necessary pathing
imagesTr_dir = os.path.join(base_dir, "imagesTr")
labelsTr_dir = os.path.join(base_dir, "labelsTr")
imagesTs_dir = os.path.join(base_dir, "imagesTs")
classification_labels_file = r"...\classification_labels.csv" #replace with path to excel file

# Read classification labels from CSV
classification_labels = pd.read_csv(classification_labels_file)

# Helper function to create entries
def create_entries(image_dir, label_dir=None, classification_labels=None):
    entries = []
    for _, row in classification_labels.iterrows():
        filename = row["filename"]
        subtype = int(row["subtype"])  # Classification label
        image_path = os.path.join(image_dir, filename)
        entry = {"image": image_path, "classification_label": subtype}
        if label_dir:
            label_path = os.path.join(label_dir, filename.replace("_0000.nii.gz", ".nii.gz"))
            entry["label"] = label_path
        entries.append(entry)
    return entries

# Create training entries
training_labels = classification_labels[classification_labels["split"] == "training"]
training_entries = create_entries(imagesTr_dir, labelsTr_dir, training_labels)

# Create validation entries
validation_labels = classification_labels[classification_labels["split"] == "validation"]
validation_entries = create_entries(imagesTr_dir, labelsTr_dir, validation_labels)

# Create test entries
test_entries = [os.path.join(imagesTs_dir, file) for file in os.listdir(imagesTs_dir) if file.endswith(".nii.gz")]

# Create dataset.json structure
dataset = {
    "name": "Task001_Pancreas",
    "description": "Multi-task dataset for segmentation and classification",
    "tensorImageSize": "3D",
    "reference": "Provided dataset",
    "licence": "CC-BY-SA",
    "release": "0.1",
    "channel_names": {
        "0": "CT"
    },
    "labels": {
        "background": 0,
        "pancreas": 1,
        "lesion": 2
    },
    "numTraining": len(training_entries) + len(validation_entries),
    "file_ending": ".nii.gz",
    "training": training_entries,
    "test": test_entries
}

# Save dataset.json
output_path = os.path.join(base_dir, "dataset.json")
with open(output_path, "w") as f:
    json.dump(dataset, f, indent=4)
print(f"dataset.json saved to {output_path}")


dataset.json saved to C:\Users\Admin\nnUNet\nnUNet_raw\Dataset001_Pancreas\dataset.json
