In [2]:
import os
import json
from pathlib import Path
import SimpleITK as sitk

DATASET_ID = '369'

SRC_FOLDER = os.path.join(
    Path().resolve().parent,
    'nnunet'
)
os.chdir(SRC_FOLDER)

os.makedirs('nnUNet_raw', exist_ok=True)
os.makedirs(f'{SRC_FOLDER}/nnUNet_raw/Dataset{DATASET_ID}', exist_ok=True)
os.makedirs(f'{SRC_FOLDER}/nnUNet_raw/Dataset{DATASET_ID}/imagesTr', exist_ok=True)
os.makedirs(f'{SRC_FOLDER}/nnUNet_raw/Dataset{DATASET_ID}/labelsTr', exist_ok=True)
os.makedirs(f'{SRC_FOLDER}/nnUNet_raw/Dataset{DATASET_ID}/imagesTs', exist_ok=True)

os.environ['nnUNet_raw'] = f"{SRC_FOLDER}/nnUNet_raw"
os.environ['nnUNet_preprocessed'] = f"{SRC_FOLDER}/nnUNet_preprocessed"
os.environ['nnUNet_results'] = f"{SRC_FOLDER}/nnUNet_results"

In [3]:
def find_git_root(path: Path) -> Path:
    """Find repository root"""
    for parent in path.parents:
        if (parent / ".git").exists():
            return parent
    
    return None

def spacing_dict(repo_root: str) -> dict[str, list]:
    """Import spacing to convert into 3D image"""
    file_path = os.path.join(repo_root, 'data', 'metadata', 'spacing_mm.txt')

    data = {}

    with open(file_path, 'r') as file:
        for line in file:
            id_part, array_part = line.strip().split(': ')
            id_key = id_part.strip().zfill(2)

            array_values = eval(array_part.strip())
            data[id_key] = array_values

    return data

In [4]:
def convert_2d_to_3d(repo_root: str, src_dir_path: str, scan_id: str, dest_dir_path: str) -> None:
    """For each CT scan (CT01 ~ CT48), converts all its 2D slices into a 3D image"""
    SPACING_DICT = spacing_dict(repo_root)

    scan_id_image_slices = []
    
    # for each scan_id's image slices
    scan_id_dir_path = os.path.join(src_dir_path, scan_id)
    image_id_list = sorted([f for f in os.listdir(scan_id_dir_path) if f.endswith('.png')], key=lambda x: int(x.split('.')[0]))
    for image_id in image_id_list:
        image_path = os.path.join(scan_id_dir_path, image_id)

        # read as image obj
        stik_image_obj = sitk.ReadImage(image_path)
        scan_id_image_slices.append(stik_image_obj)
    
    # then, combine and create 3d image
    image_3d = sitk.JoinSeries(scan_id_image_slices)
    image_3d.SetSpacing([SPACING_DICT[scan_id][0], SPACING_DICT[scan_id][1], SPACING_DICT[scan_id][2]])

    # write 3d image to destination
    RENAME_SCAN_ID = str(scan_id).zfill(3)
    output_image_name = f"{RENAME_SCAN_ID}_0000.nii.gz"
    if 'labelsTr' in dest_dir_path:
        output_image_name = f"{RENAME_SCAN_ID}.nii.gz"

    output_3d_image_path = os.path.join(dest_dir_path, output_image_name)
    sitk.WriteImage(image_3d, output_3d_image_path)

In [5]:
folder_dict = {
    'val_images': 'imagesTs',
    'val_labels': 'labelsTs'
}
REPO_ROOT = find_git_root(Path().resolve())
DATASET_DIR = os.path.join(REPO_ROOT, 'data')
OUTPUT_DIR_PATH = os.path.join(SRC_FOLDER, 'nnUNet_raw', f'Dataset{DATASET_ID}')

for src_folder, dest_folder in folder_dict.items():
    print(f'Converting {src_folder} to {dest_folder}')
    
    src_dir_path = os.path.join(DATASET_DIR, src_folder)
    dest_dir_path = os.path.join(OUTPUT_DIR_PATH, dest_folder)

    os.makedirs(dest_dir_path, exist_ok=True)
    
    scan_ids_list = sorted([ scan_id for scan_id in os.listdir(src_dir_path) if '.' not in scan_id ], key=lambda x: int(x.split('.')[0]))
    for scan_id in scan_ids_list:
        print(f"'{scan_id}' to 3D image")
        convert_2d_to_3d(
            repo_root=REPO_ROOT,
            src_dir_path=src_dir_path,
            scan_id=scan_id,
            dest_dir_path=dest_dir_path
        )

Converting val_images to imagesTs
'41' to 3D image
'42' to 3D image
'43' to 3D image
'44' to 3D image
'45' to 3D image
'46' to 3D image
'47' to 3D image
'48' to 3D image
'49' to 3D image
'50' to 3D image
Converting val_labels to labelsTs
'41' to 3D image
'42' to 3D image
'43' to 3D image
'44' to 3D image
'45' to 3D image
'46' to 3D image
'47' to 3D image
'48' to 3D image
'49' to 3D image
'50' to 3D image


In [97]:
dataset_info_dict = { 
 "channel_names": { 
    "0": "CT"
    }, 
    "labels": {
        "background": 0,
        "gallbladder": 1,
        "stomach": 2,
        "esophagus": 3,
        "right kidney": 4,
        "right adrenal gland": 5,
        "left adrenal gland": 6,
        "liver": 7,
        "left Kidney": 8,
        "aorta": 9,
        "spleen": 10,
        "inferior vena cava": 11,
        "pancreas": 12
    }, 
    "numTraining": 40, 
    "file_ending": ".nii.gz"
}
     
with open(os.path.join(OUTPUT_DIR_PATH, 'dataset.json'), 'w') as f:
    json.dump(dataset_info_dict, f)