In [1]:
import matplotlib.pyplot as plt
from src.prepare_data import CropPreprocessor, ZoomPreprocessor, find_corresponding_scan

zp = ZoomPreprocessor(
    output_folder = r'C:\Users\user\data\dl_radiomics\zoom010_2d',
    output_dim=2,
    output_size=256,
    margin=10
)

segmentation_path = r'D:\premium_data\amphia\monotherapy\split_segmentations\PREM_AM_001_0.nii.gz'
scan_path = find_corresponding_scan(segmentation_path)
data = {"img": str(scan_path), "seg": str(segmentation_path)}

# output = zp(data)

# fig, ax = plt.subplots(1,3, figsize=(12,4))
# for ix in range(3):
#     ax[ix].imshow(output['img'][ix])
# fig.show()

In [2]:
cp = CropPreprocessor(
    output_folder = r'C:\Users\user\data\dl_radiomics\crop050_3d',
    output_dim=3,
    output_size=128,
    roi_size=50
)

output = cp(data)

2022-10-28 15:38:19,733 INFO image_writer.py:193 - writing: C:\Users\user\data\dl_radiomics\crop050_3d\PREM_AM_001_0.nii.gz


In [7]:
import os
from pathlib import Path

class DataPipeline:
    def __init__(self, input_folders, output_root, output_dim, output_size, method, **kwargs):
        self.input_folders = input_folders

        output_folder_name = (f'dim-{output_dim}_size-{output_size}_method-{method}_' + '_'.join([f'{k}-{v}' for (k,v) in kwargs.items()]))
        self.output_folder = Path(output_root) / output_folder_name
        os.makedirs(self.output_folder, exist_ok=True)

        if method=='crop':
            self.processor = CropPreprocessor(None, output_dim, output_size, **kwargs)
        elif method == 'zoom':
            self.processor = ZoomPreprocessor(None, output_dim, output_size, **kwargs)

    def run(self):
        for input_folder in self.input_folders:
            input_folder = Path(input_folder)
            center = input_folder.parent.parent.name

            target_folder = self.output_folder / center
            os.makedirs(target_folder, exist_ok=True)

            self.processor.set_output_folder(target_folder)

            for segmentation_file in input_folder.iterdir():
                scan_file = self.find_corresponding_scan(segmentation_file)
                data = {'img':scan_file, 'seg':segmentation_file}
                self.processor(data)

    def find_corresponding_scan(self, segmention_path):
        scan_folder = Path(segmention_path).parent.parent / "scans"
        scan_name = Path(segmention_path).name.split(".")[0][:-2] + ".nii.gz"

        return scan_folder / scan_name




dp = DataPipeline(
    [r"D:\premium_data\amphia\monotherapy\split_segmentations"], 
    r'C:\Users\user\data\dl_radiomics', 3, 128, method='crop', roi_size=10)

In [9]:
CENTERS = [
    'amphia',
    'isala',
    'lumc',
    'maxima',
    'mst'
    'radboud',
    'umcu',
    'vumc',
    'zuyderland'
]