# What is wrong with Continual Learning in Medical Image Segmentation - Preprocessing

## Installation and Setup

For basic setup of the Lifelong nnUNet framework, please refer to the [Github repository](https://github.com/MECLabTUDA/Lifelong-nnUNet). Please note that you need so switch to the branch dev-task_agnostic_cl, before running the installation via pip.

## Data preparation

After setting up the Framework you need to prepare the datasets. Therefore the datasets need to be downloaded and placed in the nnUNet_data_base/nnUNet_raw_data folder. 

* [Cardiac](https://www.ub.edu/mnms/)
* [Prostate](https://liuquande.github.io/SAML/)

To prepare the **cardiac** database you need to run the following script. You need to adjust the VENDOR variable (the M&M datasets comes with vendors A and B) and the TASK_NAME variable to the task name that you want to assign. In this tutorial we will use the tasks Task008_mHeartA and Task009_mHeartB for the vendors A and B respectivly. Furthermore the INPUT_PATH of the input dataset and the BASE_PATH of your nnUNet_raw_data folder need to be specified.

In [None]:
import numpy as np
import SimpleITK as sitk
import pandas as pd
from pathlib import Path
import os
from nnunet.dataset_conversion.utils import generate_dataset_json


INPUT_PATH = ".../m_m_challenge_dataset/"
VENDOR = "A"

BASE_PATH = ".../nnUnet_raw_data_base/nnUNet_raw_data"
TASK_NAME = "Task008_mHeart" + VENDOR 


#create output folder structure
Path(BASE_PATH + TASK_NAME + "/imagesTr").mkdir(parents=True, exist_ok=True)
Path(BASE_PATH + TASK_NAME + "/imagesTs").mkdir(parents=True, exist_ok=True)
Path(BASE_PATH + TASK_NAME + "/labelsTr").mkdir(parents=True, exist_ok=True)

def copyMetaData(original, img):
    img.SetSpacing(original.GetSpacing())
    img.SetOrigin(original.GetOrigin())
    originalDir = original.GetDirection()
    img.SetDirection((originalDir[0], originalDir[1], originalDir[2],
                    originalDir[4], originalDir[5], originalDir[6],
                    originalDir[8], originalDir[9], originalDir[10]))
    pass
    

def extractTimeSlice(original, timeSlice):
    arr = sitk.GetArrayFromImage(original).astype(np.float32)
    arr = arr[timeSlice]
    img = sitk.GetImageFromArray(arr)
    
    img.SetSpacing(original.GetSpacing())
    img.SetOrigin(original.GetOrigin())
    
    originalDir = original.GetDirection()
    img.SetDirection((originalDir[0], originalDir[1], originalDir[2],
                    originalDir[4], originalDir[5], originalDir[6],
                    originalDir[8], originalDir[9], originalDir[10]))
    return img
    

def processLabeledImage(inputPath, inName, outName, timeSlice):
    original = sitk.ReadImage(inputPath + inName + "_sa.nii.gz")
    img = extractTimeSlice(original, timeSlice)
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/imagesTr/" + outName + ".nii.gz")
    pass
def processUnlabeledImage(inputPath, inName, outName, timeSlice):
    original = sitk.ReadImage(inputPath + inName + "_sa.nii.gz")
    img = extractTimeSlice(original, timeSlice)
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/imagesTs/" + outName + ".nii.gz")
    pass
def processSegmentationImage(inputPath, inName, outName, timeSlice):
    original = sitk.ReadImage(inputPath + inName + "_sa_gt.nii.gz")
    arr = sitk.GetArrayFromImage(original).astype(np.float32)
    arr = arr[timeSlice]
    img = sitk.GetImageFromArray(arr)
    
    copyMetaData(original, img)
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/labelsTr/" + outName + ".nii.gz")
    pass



csv = pd.read_csv(INPUT_PATH + "M&Ms Dataset Information.csv", sep='\t')
labeledData = os.listdir(INPUT_PATH + "Training-corrected/Labeled")
unlabeledData = os.listdir(INPUT_PATH + "Training-corrected/Unlabeled")

csv = csv[csv.Vendor == VENDOR]

print(csv)
index = 0

for inName, timeSlice in zip(csv["External code"], csv["ED"]):
    outName = inName + "_0000" #index for modality: 0
    index += 1
    if inName in labeledData:
        print(inName, "labeled")
        processLabeledImage(INPUT_PATH + "Training-corrected/Labeled/" + inName + "/", inName, outName, timeSlice)
        processSegmentationImage(INPUT_PATH + "Training-corrected/Labeled/" + inName + "/", inName, inName, timeSlice)
    else:
        print(inName, "unlabeled")
        processUnlabeledImage(INPUT_PATH + "Training-corrected/Unlabeled/" + inName + "/", inName, outName, timeSlice)


print("finished")


generate_dataset_json(BASE_PATH + TASK_NAME + "/dataset.json",
    BASE_PATH + TASK_NAME + "/imagesTr",
    BASE_PATH + TASK_NAME + "/imagesTs",
    ('MRI',),
    {0: 'background', 1: 'LV', 2: 'MYO', 3: 'RV' },
    TASK_NAME,
    dataset_description="VENDOR " + VENDOR) 



To prepare the prostate databse you need to run the following script. Make sure to adapt the INPUT_PATH and BASE_PATH variable to your setup. By default this script will produce task with ids 10 to 15. In case you want different ids assigned to the prostate datasets, you may want to change the TASK_NO variable, that defines the start id.


In [None]:
import numpy as np
import SimpleITK as sitk
import pandas as pd
from pathlib import Path
import os
from nnunet.dataset_conversion.utils import generate_dataset_json
import pathlib


def copyMetaData(original, img):
    img.SetSpacing(original.GetSpacing())
    img.SetOrigin(original.GetOrigin())
    originalDir = original.GetDirection()
    img.SetDirection((originalDir[0], originalDir[1], originalDir[2],
                    originalDir[3], originalDir[4], originalDir[5],
                    originalDir[6], originalDir[7], originalDir[8]))
    pass


def processLabeledImage(inputPath, inName, outName):
    original = sitk.ReadImage(inputPath + inName + ".nii.gz")
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/imagesTr/" + outName + ".nii.gz")
    pass
def processUnlabeledImage(inputPath, inName, outName):
    original = sitk.ReadImage(inputPath + inName + ".nii.gz")
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/imagesTs/" + outName + ".nii.gz")
    pass
def processSegmentationImage(inputPath, inName, outName):
    original = sitk.ReadImage(inputPath + inName + ".nii.gz")
    arr = sitk.GetArrayFromImage(original).astype(np.float32)
    img = sitk.GetImageFromArray(arr)
    
    copyMetaData(original, img)
    sitk.WriteImage(img, BASE_PATH + TASK_NAME + "/labelsTr/" + outName + ".nii.gz")
    pass


def generate_dataset_json_method(BASE_PATH, TASK_NAME, VENDOR):
    generate_dataset_json(BASE_PATH + TASK_NAME + "/dataset.json",
        BASE_PATH + TASK_NAME + "/imagesTr",
        BASE_PATH + TASK_NAME + "/imagesTs",
        ('T2 MRI',),
        {0: 'background', 1: 'foreground' },
        TASK_NAME,
        dataset_description="VENDOR " + VENDOR) 



if __name__ == '__main__':
    
    INPUT_PATH = ".../prostate_data"

    BASE_PATH = ".../nnUnet_raw_data_base/nnUNet_raw_data/"
    TASK_NO = 10
    TASK_LIST = []

    for path, dirs, files in os.walk(INPUT_PATH):
        for dirs_ in dirs:
            TASK_NAME = "Task0" + str(TASK_NO) + "_" + "Prostate" + "-"  + dirs_
            TASK_LIST.append(TASK_NAME)
            
            
            #create output folder structure
            Path(BASE_PATH + TASK_NAME + "/imagesTr").mkdir(parents=True, exist_ok=True)
            Path(BASE_PATH + TASK_NAME + "/imagesTs").mkdir(parents=True, exist_ok=True)
            Path(BASE_PATH + TASK_NAME + "/labelsTr").mkdir(parents=True, exist_ok=True)
            
            TASK_NO = TASK_NO + 1
            

        for file in files:
            dirs_ = os.path.basename(os.path.normpath(pathlib.Path(os.path.join(path, file)).parent.resolve()))

            TASK_NAME = next(task for task in TASK_LIST if str(task.rpartition('-')[-1]) == dirs_)
            if "egmentation" in file: #"egmentation" since sometimes it's upper and lowercase and should be more efficient than to upper or to lower
                #labels
                original = sitk.ReadImage(os.path.join(path, file))
                arr = sitk.GetArrayFromImage(original).astype(np.float32)
                #arr[arr != 0] = 1
                original = sitk.GetImageFromArray(arr)
                copyMetaData(sitk.ReadImage(INPUT_PATH +"/"+ dirs_ + "/" + file.split("_")[0] + ".nii.gz"), original)
                sitk.WriteImage(original, BASE_PATH + TASK_NAME + "/labelsTr/" + dirs_ + "_" + str(file.rpartition('_')[0])+ ".nii.gz")


            else: #images
                original = sitk.ReadImage(os.path.join(path, file))
                sitk.WriteImage(original, BASE_PATH + TASK_NAME + "/imagesTr/" + dirs_ + "_" + os.path.splitext(os.path.splitext(file)[0])[0] + "_0000" + ".nii.gz")
                
        
    for task in TASK_LIST:
        generate_dataset_json_method(BASE_PATH, task, str(task.rpartition('-')[-1]))
        
    


        


To create the joint dataset, you need to manually copy the data from the nnUnet_raw_data_base/nnUNet_raw_data folder to a common folder. 

For our cardiac data that means we copy the contents of Task008_mHeartA/imagesTr and Task009_mHeartB/imagesTr to Task031_Cardiac_joined/imagesTr. We proceed similar with the labelsTr folders.
After that we create the dataset.json file by running this script. Do not forget to set BASE_PATH accordingly.


In [None]:
from nnunet.dataset_conversion.utils import generate_dataset_json

BASE_PATH = BASE_PATH = ".../nnUnet_raw_data_base/nnUNet_raw_data"

generate_dataset_json(BASE_PATH + "Task031_Cardiac_joined/dataset.json",
    BASE_PATH + "/Task031_Cardiac_joined/imagesTr",
    BASE_PATH + "/Task031_Cardiac_joined/imagesTs",
    ('MRI',),
    {0: 'background', 1: 'LV', 2: 'MYO', 3: 'RV' },
    "Task031_Cardiac_joined",
    dataset_description="Merged Cardiac")

After that you can run the following basic commands to prepare the data for training 

In [None]:
!nnUNet_plan_and_preprocess -t Task008_mHeartA
!nnUNet_plan_and_preprocess -t Task009_mHeartB
!nnUNet_plan_and_preprocess -t Task031_Cardiac_joined

As a last step we will manually create the splits file, such that the joint database contains the same train/val split as the separat datasets. What you need to do is copy the splits_final.pkl files from the separate datasets to a common location, rename them to splits_final0.pkl, splits_final1.pkl and run the script below.
It will produce a splits_final.pkl file that needs to be pasted into the nnUnet_preprocessed/Task031_Cardiac_joined folder.

In [None]:
import os
import pickle
import json
import numpy
from collections import OrderedDict


def load_pickle(file, mode='rb'):
    with open(file, mode) as f:
        a = pickle.load(f)
    return a
    
def write_pickle(obj, file, mode='wb'):
    with open(file, mode) as f:
        pickle.dump(obj, f)

        
list_of_splits = [load_pickle("splits_final0.pkl"), load_pickle("splits_final1.pkl")]

final_split =[]

for fold in range(5):
    final_at_fold = OrderedDict([('train', []), ('val',[])])
    for split in list_of_splits:
        final_at_fold['train'] = numpy.concatenate((final_at_fold['train'],split[fold]['train']))
        final_at_fold['val'] = numpy.concatenate((final_at_fold['val'],split[fold]['val']))
        
    final_split.append(final_at_fold)
    
    
print(final_split)
write_pickle(final_split, "splits_final.pkl")