# Create data for Swin UNETR method

### Convert 3d pet and ct niftis to separate channels in 4d niftis
The MONAI framework reads multimodal niftis separate channels in 4d nifti images. So we need to convert out normalized raw data to 4d niftis.

In [1]:
# Load packages
from MEDIcaTe.file_folder_ops import *
from MEDIcaTe.utilities import *
from multiprocessing import Pool
import json

In [2]:
# At the source the images are resampled to median size of the dataset and normalized. '
# Images are saved as float32 and labels as INT8
'''
image_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/images'
label_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/labels'
image_dst_path = '/homes/kovacs/project_data/hnc-auto-contouring/MONAI/Task500_HNC01/imagesTr'

for i,file in enumerate(listdir(label_src_path)):
    if i>0:
        break
    case = file[:-7]
    ct_abs_path = join(image_src_path,f'{case}_0000.nii.gz')
    pet_abs_path = join(image_src_path,f'{case}_0001.nii.gz')
    pet_ct_dst_path = join(image_dst_path,file)
    convert_pet_ct_to_4d_nifti_channel_first(ct_abs_path, pet_abs_path, pet_ct_dst_path)
'''

In [None]:
'''
label_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/labels'
label_dst_path = '/homes/kovacs/project_data/hnc-auto-contouring/MONAI/Task500_HNC01/labelsTr'

for i,file in enumerate(listdir(label_src_path)):
    if i>0:
        break
    label_abs_path_src = join(label_src_path,file)
    label_abs_path_dst = join(label_dst_path,file)
    convert_label_to_channel_first(label_abs_path_src, label_abs_path_dst)
'''

In [None]:
# At the source the images are resampled to median size of the dataset and normalized. '
# Images are saved as float32 and labels as INT8
image_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/images'
label_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/labels'
image_dst_path = '/homes/kovacs/project_data/hnc-auto-contouring/MONAI/Task500_HNC01/imagesTr'

def conv_folder_to_4d(label_file):
    case = label_file[:-7]
    ct_abs_path = join(image_src_path,f'{case}_0000.nii.gz')
    pet_abs_path = join(image_src_path,f'{case}_0001.nii.gz')
    pet_ct_dst_path = join(image_dst_path,label_file)
    convert_pet_ct_to_4d_nifti_channel_first(ct_abs_path, pet_abs_path, pet_ct_dst_path)

label_files = listdir(label_src_path)
pool = Pool()
pool.map(conv_folder_to_4d, label_files)
'''
Note: This was done on a 24 cpu-core computer and took about 20 minutes for 835 cases.
Consider if you'd rather just use a for-loop or fewer cores.
''' 

In [None]:
# Reshaping all labels to have channel-first setup
label_src_path = '/homes/kovacs/project_data/hnc-auto-contouring/inner-eye/d_train_norm/labels'
label_dst_path = '/homes/kovacs/project_data/hnc-auto-contouring/MONAI/Task500_HNC01/labelsTr'

def conv_folder_to_4d(label_file):
    label_abs_path_src = join(label_src_path,label_file)
    label_abs_path_dst = join(label_dst_path,label_file)
    convert_label_to_channel_first(label_abs_path_src, label_abs_path_dst)

label_files = listdir(label_src_path)
pool = Pool()
pool.map(conv_folder_to_4d, label_files)

### Generate dataset.json
We can now generate a json file for the data set. It is generated based on the example shared by the swin-unetr guidelines at https://github.com/Project-MONAI/research-contributions/tree/main/SwinUNETR/BTCV, https://drive.google.com/file/d/1t4fIQQkONv7ArTSZe4Nucwkk1KfdUDvW/view.

We will not add test-cases as this is done separately.

Before you run this, make sure that datafolder ./imagesTr and ./labelsTr are populated.

In [55]:
dataset_json_dst_folder = '/homes/kovacs/project_data/hnc-auto-contouring/MONAI/Task500_HNC01'
data_split_pickle = '/homes/kovacs/project_data/hnc-auto-contouring/nnUNet/nnUNet_preprocessed/Task500_HNC01/splits_final.pkl'

# Data to be written
f = open(join(dataset_json_dst_folder,'dataset.json')) #get json file
dataset=json.load(f)
  
# Load the dataset split pickle file
data_split = load_pickle(data_split_pickle)

# Create json dictionary entries for validation and training set of current fold
for fold in np.arange(len(data_split)):
    train_cases = []
    for i in np.arange(len(data_split[fold]['train'])):
        case = data_split[fold]['train'][i]
        dict_entry = {  'image': f'./imagesTr/{case}.nii.gz',
                        'label': f'./labelsTr/{case}.nii.gz'}
        train_cases.append(dict_entry)

    val_cases = []
    for i in np.arange(len(data_split[fold]['val'])):
        case = data_split[fold]['val'][i]
        dict_entry = {  'image': f'./imagesTr/{case}.nii.gz',
                        'label': f'./labelsTr/{case}.nii.gz'}
        val_cases.append(dict_entry)

    dataset['training'] = train_cases
    dataset['validation'] = val_cases

    # Serializing json 
    json_object = json.dumps(dataset, indent = 4)
    
    # Writing to sample.json
    with open(join(dataset_json_dst_folder,f"dataset_{fold}.json"), "w") as outfile:
        outfile.write(json_object)