In [1]:
import os
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
import nibabel as nib
import shutil
from typing import Tuple
from batchgenerators.utilities.file_and_folder_operations import save_json, join

In [2]:
data_dir = "/vol/biomedic3/bglocker/radiotherapy/kat100/nifti"
target_base = "/vol/bitbucket/djk18/nnUnet_models/nnUNet_raw_data"



# Also dataset 502_ctv -> this includes same classes as 503 but with regions on bowels (not fem heads)
def setup_dataset503(): 
    #Standard multiclass
    global task_name, masks
    task_name = "Dataset503"
    masks = ["Bones", "FemoralHead_L", "FemoralHead_R", "Bladder", "Anorectum", "Bowel-bag", "Bowel-loops", "CTVp"]

# Not used

# def setup_dataset504():
#     # regions 
#     global task_name, masks 
#     task_name = "Dataset504"
#     masks = ["Bones", "FemoralHead_L", "FemoralHead_R", "Bladder", "Anorectum", "Bowel-bag", "Bowel-loops", "CTVp"]


def setup_dataset505():
    # Standard Multiclass - Used for experiment 1 in report
    global task_name, masks
    task_name = "Dataset505"
    masks = ["Bones", "FemoralHead", "Bladder", "Anorectum", "Bowel-bag", "Bowel-loops", "CTVp"]

def setup_dataset506():
    # regions - used for experiment 1 in report
    global task_name, masks
    task_name = "Dataset506"
    masks = ["Bones", "FemoralHead", "Bladder", "Anorectum", "Bowel-bag", "Bowel-loops", "CTVp"]


def setup_dataset507(): 
    # used for experiment 2. This is the ensemble
    global task_name, masks
    task_name = "Dataset507"
    masks = ["CTVn", "CTVp", "Anorectum"]

setup_dataset507()

In [3]:
def get_train_set():
  sample_names = sorted(os.listdir(data_dir))
  return sample_names[:80]

def get_test_set():
  sample_names = sorted(os.listdir(data_dir))
  return sample_names[80:]



In [4]:
def create_folder_structure(task_path):
  isExist = os.path.exists(task_path)
  if not isExist:
   # Create a new directory because it does not exist
   os.makedirs(task_path)
   os.makedirs(task_path + "/imagesTr")
   os.makedirs(task_path + "/imagesTs")
   os.makedirs(task_path + "/labelsTr")
   print("The new directory is created!")

In [5]:
# Create NN-Unet data set
def create_dataset(input_data):
    create_folder_structure(os.path.join(target_base, task_name))

    num_masks = len(masks) 
    
    fem_head_index_in_list = int(masks.index("FemoralHead")) if "FemoralHead" in masks else -1

    for patient_name in get_train_set():
        print(patient_name)
        combine_femoral_heads = False
        patient_path = os.path.join(input_data,  patient_name)
        image_path = os.path.join(patient_path,  "image.nii.gz")

        label_map = np.array([], dtype=np.dtype('u1') )
        for (i,mask) in enumerate(masks):
            if (mask == "FemoralHead"):
                mask_path = os.path.join(input_data, patient_name,  "mask_FemoralHead_L.nii.gz")
                combine_femoral_heads = True
            else:
                mask_path = os.path.join(input_data, patient_name,  "mask_" + mask + ".nii.gz")

            try:
            
                img = nib.load(mask_path)
                img_array = np.array(img.dataobj)
                if(i == 0):
                    img_header=img.header.copy()
                    label_map = np.zeros(img_array.shape, dtype=np.dtype('u1') )
            
                # Assumes no overlapping labels i.e labels set in order of the mask list. 
                # Place substructures last in this list so they are not overwritten
                if ((i == num_masks - 1) and combine_femoral_heads):
                    label_map[img_array != 0] = fem_head_index_in_list + 1
                else:
                    label_map[img_array != 0] = i+1 

                
            except Exception as e:
                print(e)
           
        ni_img = nib.Nifti1Image(label_map, None, header=img_header)

        shutil.copyfile(image_path, os.path.join(target_base, task_name, "imagesTr",  patient_name + "_0000.nii.gz"))
        nib.save(ni_img, os.path.join(target_base, task_name,  "labelsTr", patient_name + ".nii.gz"))
        #print(label_map[label_map != 0][0])
            
                
    for patient_name in get_test_set():
        print(patient_name)
        patient_path = os.path.join(input_data,  patient_name)
        image_path = os.path.join(patient_path,  "image.nii.gz")
        shutil.copyfile(image_path, os.path.join(target_base, task_name, "imagesTs",  patient_name + "_0000.nii.gz"))

#create_dataset(data_dir)
create_dataset("/vol/bitbucket/djk18/dataset/downsample_2m_isotropic")

The new directory is created!


ValueError: 'FemoralHead' is not in list

In [8]:
# Generate dataaset function from https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunetv2/dataset_conversion/generate_dataset_json.py
def generate_dataset_json(output_folder: str,
                          channel_names: dict,
                          labels: dict,
                          num_training_cases: int,
                          file_ending: str,
                          regions_class_order: Tuple[int, ...] = None,
                          dataset_name: str = None, reference: str = None, release: str = None, license: str = None,
                          description: str = None,
                          overwrite_image_reader_writer: str = None, **kwargs):
    """
    Generates a dataset.json file in the output folder
    channel_names:
        Channel names must map the index to the name of the channel, example:
        {
            0: 'T1',
            1: 'CT'
        }
        Note that the channel names may influence the normalization scheme!! Learn more in the documentation.
    labels:
        This will tell nnU-Net what labels to expect. Important: This will also determine whether you use region-based training or not.
        Example regular labels:
        {
            'background': 0,
            'left atrium': 1,
            'some other label': 2
        }
        Example region-based training:
        {
            'background': 0,
            'whole tumor': (1, 2, 3),
            'tumor core': (2, 3),
            'enhancing tumor': 3
        }
        Remember that nnU-Net expects consecutive values for labels! nnU-Net also expects 0 to be background!
    num_training_cases: is used to double check all cases are there!
    file_ending: needed for finding the files correctly. IMPORTANT! File endings must match between images and
    segmentations!
    dataset_name, reference, release, license, description: self-explanatory and not used by nnU-Net. Just for
    completeness and as a reminder that these would be great!
    overwrite_image_reader_writer: If you need a special IO class for your dataset you can derive it from
    BaseReaderWriter, place it into nnunet.imageio and reference it here by name
    kwargs: whatever you put here will be placed in the dataset.json as well
    """
    has_regions: bool = any([isinstance(i, (tuple, list)) and len(i) > 1 for i in labels.values()])
    if has_regions:
        assert regions_class_order is not None, f"You have defined regions but regions_class_order is not set. " \
                                                f"You need that."
    # channel names need strings as keys
    keys = list(channel_names.keys())
    for k in keys:
        if not isinstance(k, str):
            channel_names[str(k)] = channel_names[k]
            del channel_names[k]

    # labels need ints as values
    for l in labels.keys():
        value = labels[l]
        if isinstance(value, (tuple, list)):
            value = tuple([int(i) for i in value])
            labels[l] = value
        else:
            labels[l] = int(labels[l])

    dataset_json = {
        'channel_names': channel_names,  
        'labels': labels,
        'numTraining': num_training_cases,
        'file_ending': file_ending,
    }

    if dataset_name is not None:
        dataset_json['name'] = dataset_name
    if reference is not None:
        dataset_json['reference'] = reference
    if release is not None:
        dataset_json['release'] = release
    if license is not None:
        dataset_json['licence'] = license
    if description is not None:
        dataset_json['description'] = description
    if overwrite_image_reader_writer is not None:
        dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer
    if regions_class_order is not None:
        dataset_json['regions_class_order'] = regions_class_order

    dataset_json.update(kwargs)

    save_json(dataset_json, join(output_folder, 'dataset.json'), sort_keys=False)

In [10]:

target_base     = join("/vol/bitbucket/djk18/nnUnet_models/nnUNet_raw_data/", task_name)
target_imagesTr = join(target_base, "imagesTr")
target_imagesTs = join(target_base, "imagesTs")
target_labelsTs = join(target_base, "labelsTs")
target_labelsTr = join(target_base, "labelsTr")

# What is modality?
#generate_dataset_json_old(join(target_base, 'dataset.json'), target_imagesTr, target_imagesTs, ('M'),
#                          labels={0: 'background', 1: "Bones", 2: "FemoralHead_L", 3: "FemoralHead_R", 4: "Bladder", 5: "Anorectum", 6: "Bowel-bag", 7: "Bowel-loops", 8: "CTVp"}, dataset_name=task_name, license='Academic use')
#, dataset_name=task_name, license='Academic use'


def dataset503_json():
     generate_dataset_json(target_base,
                          channel_names={0: 'CT'},
                          labels={
                                'background': 0,
                                'Bones': 1,
                                'FemoralHead_L': 2,
                                'FemoralHead_R': 3,
                                'Bladder': 4,
                                "Anorectum": 5, 
                                "Bowel-bag": (6,7), 
                                "Bowel-loops": (7,), 
                                "CTVp": 8
                            },
                          num_training_cases=70,
                          file_ending='.nii.gz',
                          regions_class_order=(1,2,3,4,5,6,7,8),
                          dataset_name=task_name,
                          license='MIT',
                          dataset_release='1.0')
     
def dataset504_json():
    generate_dataset_json(target_base,
                          channel_names={0: 'CT'},
                          labels={
                                'background': 0,
                                'Bones': 1,
                                'FemoralHead_L': 2,
                                'FemoralHead_R': 3,
                                'Bladder': 4,
                                "Anorectum": 5, 
                                "Bowel-bag": 6, 
                                "Bowel-loops": 7, 
                                "CTVp": 8
                            },
                          num_training_cases=80,
                          file_ending='.nii.gz',
                          dataset_name=task_name,
                          license='MIT',
                          dataset_release='1.0')

def dataset505_json():
    generate_dataset_json(target_base,
                          channel_names={0: 'CT'},
                          labels={
                                'background': 0,
                                'Bones': 1,
                                'FemoralHead' : 2,
                                'Bladder': 3,
                                "Anorectum": 4, 
                                "Bowel-bag": 5, 
                                "Bowel-loops": 6, 
                                "CTVp": 7
                            },
                          num_training_cases=80,
                          file_ending='.nii.gz',
                          dataset_name=task_name,
                          license='MIT',
                          dataset_release='1.0')
    
def dataset506_json():
    generate_dataset_json(target_base,
                          channel_names={0: 'CT'},
                          labels={
                                'background': 0,
                                'Bones': (1, 2),
                                'FemoralHead': 2,
                                'Bladder': 3,
                                "Anorectum": 4, 
                                "Bowel-bag": (5,6), 
                                "Bowel-loops": 6, 
                                "CTVp": 7
                            },
                          num_training_cases=80,
                          file_ending='.nii.gz',
                          regions_class_order=(1,2,3,4,5,6,7),
                          dataset_name=task_name,
                          license='MIT',
                          dataset_release='1.0')

function_map = {
    "Dataset503" : dataset503_json,
    "Dataset504" : dataset504_json,
    "Dataset505" : dataset505_json,
    "Dataset506" : dataset506_json

}

if task_name in function_map:
    print(task_name)
    function = function_map[task_name]
    function()
else:
    print("Taskname not in function map")

Dataset506
