This notebook presents a dummy example of a segmentation pipeline to show what we expect as submission file.
We assume that the model is trained on 3D images resampled at 1 $mm^3$ and that the input shape of the model is 
[batch_size, x_dim, y_dim, z_dim, channel]. The last dimension (channel) is 2 and represents the CT and PT images.
The output image is a binary segmentation, and its shape is [batch_size, x_dim, y_dim, z_dim, 1].

In [None]:
from pathlib import Path
import shutil

import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm

In [None]:
test_folder = Path("../data/hecktor2021_test/hecktor_nii").resolve()
results_folder = Path("../data/dummy_segmentation_results/").resolve()
results_folder.mkdir(exist_ok=True)

bbox_df = pd.read_csv("../data/hecktor2021_test/hecktor2021_bbox_testing.csv").set_index("PatientID")

In [16]:
def dummy_model(x):
    return np.random.uniform(size=x.shape[:4] + (1, )) 

In [17]:
patient_list = [f.name[:7] for f in test_folder.rglob("*_ct.nii.gz")]

In [18]:
# Instantiating the resampler
resampling_spacing = np.array([1.0, 1.0, 1.0])
pre_resampler = sitk.ResampleImageFilter()
pre_resampler.SetInterpolator(sitk.sitkBSpline)
pre_resampler.SetOutputSpacing(resampling_spacing)

post_resampler = sitk.ResampleImageFilter()
post_resampler.SetInterpolator(sitk.sitkNearestNeighbor)

In [20]:
test_folder = Path("/mnt/faststorage/jintao/HNSCC/hecktor2021_test/hecktor_nii/").resolve()
pred_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/test_nii/").resolve()
results_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/submission/").resolve()


for p_id in patient_list:
    # loading the images and storing the ct spacing
    image_ct = sitk.ReadImage(str(test_folder / (p_id + "_ct.nii.gz")))
    image_pt = sitk.ReadImage(str(test_folder / (p_id + "_pt.nii.gz")))
    spacing_ct = image_ct.GetSpacing()

    # getting the bounding box
    bb = np.squeeze(
        np.array([
            bbox_df.loc[p_id, ["x1", "y1", "z1", "x2", "y2", "z2"]],
        ]))

    # resampling the images
    resampled_size = np.round(
        (bb[3:] - bb[:3]) / resampling_spacing).astype(int)
    pre_resampler.SetOutputOrigin(bb[:3])
    pre_resampler.SetSize([int(k)
                           for k in resampled_size])  # sitk requires this
    image_ct = pre_resampler.Execute(image_ct)
    image_pt = pre_resampler.Execute(image_pt)

    # sitk to numpy, sitk stores images with [dim_z, dim_y, dim_x]
    array_ct = np.transpose(sitk.GetArrayFromImage(image_ct), (2, 1, 0))
    array_pt = np.transpose(sitk.GetArrayFromImage(image_pt), (2, 1, 0))

    # ... apply your preprocessing here


    x = np.stack([array_ct, array_pt], axis=-1)
    x = x[np.newaxis, ...]  # adding batch dimension
    segmentation = dummy_model(x)[0, :, :, :, 0]
    print(x.shape)
    
    # do not forget to threshold your output
    segmentation = (segmentation < 0.5).astype(np.uint8)

    # numpy to sitk
    image_segmentation = sitk.GetImageFromArray(
        np.transpose(segmentation, (2, 1, 0)))

    image_segmentation.SetOrigin(bb[:3])
    image_segmentation.SetSpacing(resampling_spacing)

    # If you do not resample to the orginal CT resolution,
    # the following nearest neighbor resampling will be applied to your submission.
    # We encourage you to try other resampling methods that are more suited to
    # binary mask.
    final_size = np.round((bb[3:] - bb[:3]) / spacing_ct).astype(int)
    post_resampler.SetOutputSpacing(spacing_ct)
    post_resampler.SetOutputOrigin(bb[:3])
    post_resampler.SetSize([int(k) for k in final_size])  # sitk requires this

    image_segmentation = post_resampler.Execute(image_segmentation)

    # Saving the prediction
    sitk.WriteImage(
        image_segmentation,
        str(results_folder / (p_id + ".nii.gz")),
    )


(1, 144, 144, 144, 2)
(1, 144, 144, 144, 2)
(1, 144, 144, 144, 2)
(1, 144, 144, 144, 2)


KeyboardInterrupt: 

In [None]:
# zip the folder and it is ready for submission
shutil.make_archive("../data/dummy_segmentation_submission", "zip", results_folder)

In [None]:
np.unique(image_segmentation)

## my code here:

In [27]:
from pathlib import Path
import shutil
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm

test_folder = Path("/mnt/faststorage/jintao/HNSCC/hecktor2021_test/hecktor_nii/").resolve()
pred_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/test_nii/").resolve()
results_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/submission/").resolve()
#results_folder.mkdir(exist_ok=True)

bbox_df = pd.read_csv("/mnt/faststorage/jintao/HNSCC/hecktor2021_test/hecktor2021_bbox_testing.csv").set_index("PatientID")
patient_list = [f.name[:7] for f in test_folder.rglob("*_ct.nii.gz")]

In [30]:
from pathlib import Path
import shutil
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm

test_folder = Path("/mnt/faststorage/jintao/HNSCC/hecktor2021_test/hecktor_nii/").resolve()
pred_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/test_nii/").resolve()
results_folder = Path("/mnt/faststorage/jintao/nnUNet/nnUNet_results/nnUNet/3d_fullres/Task221_hecktor_baseline/nnUNetTrainerV2__nnUNetPlansv2.1/submission/").resolve()
#results_folder.mkdir(exist_ok=True)

bbox_df = pd.read_csv("/mnt/faststorage/jintao/HNSCC/hecktor2021_test/hecktor2021_bbox_testing.csv").set_index("PatientID")
patient_list = [f.name[:7] for f in test_folder.rglob("*_ct.nii.gz")]
resampling_spacing = np.array([1.0, 1.0, 1.0])
post_resampler = sitk.ResampleImageFilter()
post_resampler.SetInterpolator(sitk.sitkNearestNeighbor)
patient_list = sorted(patient_list)
for p_id in tqdm(patient_list):
    # loading the images and storing the ct spacing
    image_ct = sitk.ReadImage(str(test_folder / (p_id + "_ct.nii.gz")))
    image_pt = sitk.ReadImage(str(test_folder / (p_id + "_pt.nii.gz")))
    spacing_ct = image_ct.GetSpacing()
    final_size = image_ct.GetSize()
    # getting the bounding box
    bb = np.squeeze(
        np.array([
            bbox_df.loc[p_id, ["x1", "y1", "z1", "x2", "y2", "z2"]],
        ]))
    #bb_df = bbox_df
    #bb = np.array([
    #    bb_df.loc[p_id, 'x1'], bb_df.loc[p_id, 'y1'], bb_df.loc[p_id, 'z1'],
    #    bb_df.loc[p_id, 'x2'], bb_df.loc[p_id, 'y2'], bb_df.loc[p_id, 'z2']
    #])

    # numpy to sitk
    image_segmentation = sitk.ReadImage(str(pred_folder / (p_id + ".nii.gz")))
    

    #image_segmentation.SetOrigin(bb[:3])
    #image_segmentation.SetSpacing(resampling_spacing)

    # If you do not resample to the orginal CT resolution,
    # the following nearest neighbor resampling will be applied to your submission.
    # We encourage you to try other resampling methods that are more suited to
    # binary mask.
    final_size = np.round((bb[3:] - bb[:3]) / spacing_ct).astype(int)
    post_resampler.SetOutputSpacing(spacing_ct)
    post_resampler.SetOutputOrigin(image_ct.GetOrigin())
    post_resampler.SetSize([int(k) for k in final_size])  # sitk requires this

    image_segmentation = post_resampler.Execute(image_segmentation)

    # Saving the prediction
    sitk.WriteImage(
        image_segmentation,
        str(results_folder / (p_id + ".nii.gz")),
    )


100%|██████████| 101/101 [04:54<00:00,  2.91s/it]
