In [2]:
import numpy as np
from batchgenerators.utilities.file_and_folder_operations import *
from nnunet.dataset_conversion.utils import generate_dataset_json
from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
from nnunet.utilities.file_conversions import convert_2d_image_to_nifti

ModuleNotFoundError: No module named 'batchgenerators'

In [2]:
from skimage import io
def convert_2d_image_to_nifti(input_filename: str, output_filename_truncated: str, spacing=(999, 1, 1),
                              transform=None, is_seg: bool = False) -> None:
    """
    Reads an image (must be a format that it recognized by skimage.io.imread) and converts it into a series of niftis.
    The image can have an arbitrary number of input channels which will be exported separately (_0000.nii.gz,
    _0001.nii.gz, etc for images and only .nii.gz for seg).
    Spacing can be ignored most of the time.
    !!!2D images are often natural images which do not have a voxel spacing that could be used for resampling. These images
    must be resampled by you prior to converting them to nifti!!!
    Datasets converted with this utility can only be used with the 2d U-Net configuration of nnU-Net
    If Transform is not None it will be applied to the image after loading.
    Segmentations will be converted to np.uint32!
    :param is_seg:
    :param transform:
    :param input_filename:
    :param output_filename_truncated: do not use a file ending for this one! Example: output_name='./converted/image1'. This
    function will add the suffix (_0000) and file ending (.nii.gz) for you.
    :param spacing:
    :return:
    """
    img = io.imread(input_filename)

    if transform is not None:
        img = transform(img)

    if len(img.shape) == 2:  # 2d image with no color channels
        img = img[None, None]  # add dimensions
    else:
        assert len(img.shape) == 3, "image should be 3d with color channel last but has shape %s" % str(img.shape)
        # we assume that the color channel is the last dimension. Transpose it to be in first
        img = img.transpose((2, 0, 1))
        # add third dimension
        img = img[:, None]

    # image is now (c, x, x, z) where x=1 since it's 2d
    if is_seg:
        assert img.shape[0] == 1, 'segmentations can only have one color channel, not sure what happened here'

    for j, i in enumerate(img):

        if is_seg:
            i = i.astype(np.uint32)

        itk_img = sitk.GetImageFromArray(i)
        itk_img.SetSpacing(list(spacing)[::-1])
        if not is_seg:
            sitk.WriteImage(itk_img, output_filename_truncated + "_%04.0d.nii.gz" % j)
        else:
            sitk.WriteImage(itk_img, output_filename_truncated + ".nii.gz")

In [5]:
import numpy as np
from batchgenerators.utilities.file_and_folder_operations import *
from nnunet.dataset_conversion.utils import generate_dataset_json
from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
from nnunet.utilities.file_conversions import convert_2d_image_to_nifti


base = '/mnt/netcache/diag/silvan/Projects/CardiacOCT/nnunet_data5'
# this folder should have the training and testing subfolders
print(nnUNet_raw_data)
# now start the conversion to nnU-Net:
task_name = 'Task101_CardiacOCT'
target_base = join('/mnt/netcache/diag/silvan/Projects/CardiacOCT_nnunet','nnUNet_raw_data', task_name)
target_imagesTr = join(target_base, "imagesTr")
target_imagesTs = join(target_base, "imagesTs")
target_labelsTs = join(target_base, "labelsTs")
target_labelsTr = join(target_base, "labelsTr")

maybe_mkdir_p(target_imagesTr)
maybe_mkdir_p(target_labelsTs)
maybe_mkdir_p(target_imagesTs)
maybe_mkdir_p(target_labelsTr)

# convert the training examples. Not all training images have labels, so we just take the cases for which there are
# labels
labels_dir_tr = join(base, 'labelsTr')
images_dir_tr = join(base, 'imagesTr')
training_cases = subfiles(labels_dir_tr, suffix='.mha', join=False)
for t in training_cases:
    unique_name = t[:-4]  # just the filename with the extension cropped away, so img-2.png becomes img-2 as unique_name
    input_segmentation_file = join(labels_dir_tr, t)
    input_image_file = join(images_dir_tr, t)

    output_image_file = join(target_imagesTr, unique_name)  # do not specify a file ending! This will be done for you
    output_seg_file = join(target_labelsTr, unique_name)  # do not specify a file ending! This will be done for you

    # this utility will convert 2d images that can be read by skimage.io.imread to nifti. You don't need to do anything.
    # if this throws an error for your images, please just look at the code for this function and adapt it to your needs
    convert_2d_image_to_nifti(input_image_file, output_image_file, is_seg=False)

    # the labels are stored as 0: background, 255: road. We need to convert the 255 to 1 because nnU-Net expects
    # the labels to be consecutive integers. This can be achieved with setting a transform
    convert_2d_image_to_nifti(input_segmentation_file, output_seg_file, is_seg=True)

# # now do the same for the test set
# labels_dir_ts = join(base, 'labelsTs')
# images_dir_ts = join(base, 'imagesTs')
# testing_cases = subfiles(labels_dir_ts, suffix='.mha', join=False)
# for ts in testing_cases:
#     unique_name = ts[:-4]
#     input_segmentation_file = join(labels_dir_ts, ts)
#     input_image_file = join(images_dir_ts, ts)

#     output_image_file = join(target_imagesTs, unique_name)
#     output_seg_file = join(target_labelsTs, unique_name)

#     convert_2d_image_to_nifti(input_image_file, output_image_file, is_seg=False)
#     convert_2d_image_to_nifti(input_segmentation_file, output_seg_file, is_seg=True)

# finally we can call the utility for generating a dataset.json
generate_dataset_json(join(target_base, 'dataset.json'), target_imagesTr, target_imagesTs, ('Red', 'Green', 'Blue'),
                      labels={0: 'background', 1: 'lumen', 2: 'guidewire', 3: 'wall', 4: 'lipid', 5: 'calcium', 6: 'media', 7: 'catheter', 8: 'sidebranch', 9: 'rthrombus', 10: 'wthrombus', 11: 'dissection', 12: 'rupture'}, dataset_name=task_name)

"""
once this is completed, you can use the dataset like any other nnU-Net dataset. Note that since this is a 2D
dataset there is no need to run preprocessing for 3D U-Nets. You should therefore run the 
`nnUNet_plan_and_preprocess` command like this:

> nnUNet_plan_and_preprocess -t 120 -pl3d None

once that is completed, you can run the trainings as follows:
> nnUNet_train 2d nnUNetTrainerV2 120 FOLD

(where fold is again 0, 1, 2, 3 and 4 - 5-fold cross validation)

there is no need to run nnUNet_find_best_configuration because there is only one model to choose from.
Note that without running nnUNet_find_best_configuration, nnU-Net will not have determined a postprocessing
for the whole cross-validation. Spoiler: it will determine not to run postprocessing anyways. If you are using
a different 2D dataset, you can make nnU-Net determine the postprocessing by using the
`nnUNet_determine_postprocessing` command
"""

None


'\nonce this is completed, you can use the dataset like any other nnU-Net dataset. Note that since this is a 2D\ndataset there is no need to run preprocessing for 3D U-Nets. You should therefore run the \n`nnUNet_plan_and_preprocess` command like this:\n\n> nnUNet_plan_and_preprocess -t 120 -pl3d None\n\nonce that is completed, you can run the trainings as follows:\n> nnUNet_train 2d nnUNetTrainerV2 120 FOLD\n\n(where fold is again 0, 1, 2, 3 and 4 - 5-fold cross validation)\n\nthere is no need to run nnUNet_find_best_configuration because there is only one model to choose from.\nNote that without running nnUNet_find_best_configuration, nnU-Net will not have determined a postprocessing\nfor the whole cross-validation. Spoiler: it will determine not to run postprocessing anyways. If you are using\na different 2D dataset, you can make nnU-Net determine the postprocessing by using the\n`nnUNet_determine_postprocessing` command\n'

In [18]:
# Make custom train/validation split
import pandas as pd
import os
base = '/mnt/netcache/diag/silvan/Projects/CardiacOCT/nnunet_data5/'

task_name = 'Task101_CardiacOCT/'
target_base = '/mnt/netcache/diag/silvan/Projects/CardiacOCT_nnunet/nnUNet_raw_data/'+ task_name
labelsTr = target_base + "labelsTr"


df = pd.read_csv(base + 'Randomizatie_first_analysis.csv')

training_patients = []

IDs = os.listdir(labelsTr)

for row in df.itertuples():
    patient_ID = row.Site
    set_ID = row.Arm
    if set_ID == 'Training':
        training_patients.append(patient_ID)

training_patients = np.array(training_patients)
training_patients = training_patients[np.random.permutation(len(training_patients))]

train_split = training_patients[:58]
val_split = training_patients[58:]

train_IDs = []
val_IDs = []

for frame_ID in IDs:
    if np.isin(frame_ID.split('_')[0], train_split):
        train_IDs.append(frame_ID[:-7])
    elif np.isin(frame_ID.split('_')[0], val_split):
        val_IDs.append(frame_ID[:-7])

print(train_IDs)

['NLD-RADB-0097_frame_241_1', 'NLD-AMPH-0012_frame_521_2', 'NLD-AMPH-0072_frame_161_2', 'NLD-AMPH-0005_frame_481_0', 'NLD-ISALA-0085_frame_241_2', 'NLD-AMPH-0045-RCA_frame_437_1', 'NLD-AMPH-0063_frame_298_1', 'NLD-AMPH-0005_frame_121_2', 'NLD-ISALA-0058_frame_1_1', 'NLD-ISALA-0058_frame_289_1', 'NLD-AMPH-0013_frame_170_2', 'NLD-RADB-0097_frame_201_3', 'EST-NEMC-0027-RCx_frame_15_1', 'NLD-AMPH-0072_frame_121_0', 'NLD-ISALA-0076_frame_521_2', 'NLD-ISALA-0092_frame_1_2', 'NLD-RADB-0084-MO2_frame_41_0', 'NLD-ISALA-0058_frame_295_2', 'NLD-ISALA-0085_frame_201_0', 'NLD-ISALA-0087_frame_340_1', 'NLD-ISALA-0095-LAD_frame_81_0', 'NLD-AMPH-0005_frame_161_0', 'NLD-AMPH-0045-RCA_frame_81_3', 'NLD-RADB-0085_frame_521_2', 'NLD-AMPH-0072_frame_270_1', 'NLD-AMPH-0029_frame_81_2', 'NLD-AMPH-0007_frame_61_3', 'NLD-ISALA-0079_frame_481_2', 'NLD-ISALA-0076_frame_441_3', 'NLD-HMC-0009-RCAp_frame_401_3', 'NLD-AMPH-0017-LAD_frame_465_1', 'NLD-RADB-0097_frame_361_2', 'NLD-ISALA-0081-LAD_frame_121_3', 'NLD-RAD

In [19]:
len(train_IDs)

3100

In [23]:
print(val_IDs)

['NLD-RADB-0096_frame_265_3', 'NLD-ISALA-0097_frame_441_0', 'NLD-ISALA-0097_frame_401_2', 'NLD-ISALA-0097_frame_321_0', 'NLD-ISALA-0097_frame_81_2', 'NLD-ISALA-0097_frame_361_2', 'NLD-ISALA-0097_frame_201_3', 'NLD-ISALA-0097_frame_241_1', 'NLD-ISALA-0008_frame_81_3', 'NLD-HMC-0009-RCAd_frame_521_2', 'NLD-RADB-0095_frame_121_0', 'NLD-HMC-0009-RCAd_frame_441_3', 'NLD-RADB-0095_frame_481_2', 'NLD-RADB-0095_frame_161_2', 'NLD-HMC-0009-RCAd_frame_401_1', 'NLD-HMC-0009-RCAd_frame_321_3', 'NLD-HMC-0009-RCAd_frame_361_1', 'NLD-HMC-0009-RCAd_frame_201_0', 'NLD-HMC-0009-RCAd_frame_241_2', 'NLD-RADB-0095_frame_292_0', 'NLD-RADB-0095_frame_281_3', 'NLD-RADB-0096_frame_441_3', 'NLD-ISALA-0097_frame_461_0', 'NLD-RADB-0096_frame_401_1', 'NLD-RADB-0096_frame_81_3', 'NLD-RADB-0096_frame_521_2', 'NLD-RADB-0096_frame_201_0', 'NLD-HMC-0008_frame_41_0', 'NLD-RADB-0095_frame_338_2', 'NLD-RADB-0096_frame_241_2', 'NLD-RADB-0096_frame_321_3', 'NLD-HMC-0009-RCAd_frame_41_0', 'NLD-ISALA-0008_frame_1_2', 'NLD-RAD

In [22]:
val_split

array(['NLD-HMC-0009-RCAd', 'NLD-HMC-0008', 'NLD-RADB-0095',
       'NLD-RADB-0096', 'NLD-ISALA-0008', 'NLD-ISALA-0097'], dtype='<U18')