In [1]:
import monai
import torch
import glob
import shutil
import os
from tqdm import tqdm
from monai.data import DataLoader, Dataset, load_decathlon_datalist
from monai.data.utils import no_collation
from monai.transforms import (
    Compose,
    EnsureChannelFirstd,
    EnsureTyped,
    LoadImaged,
    Orientationd,
    SaveImaged,
    Spacingd,
)

In [2]:
monai.config.print_config()

MONAI version: 1.2.0
Numpy version: 1.24.4
Pytorch version: 2.0.1+cu118
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: c33f1ba588ee00229a309000e888f9817b4f1934
MONAI __file__: /usr/local/lib/python3.8/dist-packages/monai/__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: 5.3.0
Nibabel version: 5.1.0
scikit-image version: 0.21.0
Pillow version: 10.0.0
Tensorboard version: 2.14.0
gdown version: NOT INSTALLED or UNKNOWN VERSION.
TorchVision version: 0.15.2+cu118
tqdm version: 4.66.1
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 5.9.5
pandas version: 2.0.3
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: 2.6.0
pynrrd version: 1.0.0

For details about installing the optional dependencies, please visit:
    https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies



### spliting HC exams and masks

In [3]:
# MASK_PATH = r'/data/HC/*/*-label.nrrd'
# mask_list = glob.glob(MASK_PATH)
# print(len(mask_list))
# print(mask_list[0])

# ### creating copies
# dest_dir = '/data/HC-masks/'
# for i in tqdm(range(len(mask_list))):
#     src_file = mask_list[i]
#     shutil.copy(src_file, dest_dir)

#     dst_file = os.path.join(dest_dir, mask_list[i].split('/')[4])
#     new_dst_file_name = os.path.join(
#         dest_dir,
#         mask_list[i].split('/')[3] + '.nrrd'
#     )
#     os.rename(dst_file, new_dst_file_name)

In [4]:
# EXAM_PATH = r'/data/HC/*/303*.nrrd'
# exam_list = glob.glob(EXAM_PATH)
# print(len(exam_list))
# print(exam_list[0])

# ### creating copies
# dest_dir = '/data/HC-exams/'
# for i in tqdm(range(len(exam_list))):
#     src_file = exam_list[i]
#     shutil.copy(src_file, dest_dir)

#     dst_file = os.path.join(dest_dir, exam_list[i].split('/')[4])
#     new_dst_file_name = os.path.join(
#         dest_dir,
#         exam_list[i].split('/')[3] + '.nrrd'
#     )
#     os.rename(dst_file, new_dst_file_name)

### perform conversion

In [5]:
spacing = [0.703125, 0.703125, 1.25]
data_base_dir = '/data/HC_Images_resample/'
# data_base_dir = '/data/HC_Masks_resample/'
# data_base_dir = '/data/MSD_Images_resample/'
# data_base_dir = '/data/MSD_Masks_resample/'
# data_base_dir = '/data/LUNA16_Images_resample2/'
data_list_file_path = '/data/output/hc_train_val3.json'
# data_list_file_path = '/data/output/hc_test3.json'
# data_list_file_path = '/data/output/msd_train_val2.json'
# data_list_file_path = '/data/output/msd_test2.json'
# data_list_file_path = '/data/output/LUNA16_datasplit/mhd_original/dataset_fold0.json'
orig_data_base_dir = '/data/HC-exams/'
# orig_data_base_dir = '/data/HC-masks/'
# orig_data_base_dir = '/data/MSD-exams/'
# orig_data_base_dir = '/data/MSD-masks/'
# orig_data_base_dir = '/data/LUNA16_Images/'

In [6]:
process_transforms = Compose(
    [
        LoadImaged(
            keys=["image"],
            meta_key_postfix="meta_dict",
            reader="itkreader",
            affine_lps_to_ras=True,
        ),
        EnsureChannelFirstd(keys=["image"]),
        EnsureTyped(keys=["image"], dtype=torch.float16),
        Orientationd(keys=["image"], axcodes="RAS"),
        Spacingd(keys=["image"], pixdim=spacing, padding_mode="border"),
    ]
)

monai.transforms.io.dictionary LoadImaged.__init__:image_only: Current default value of argument `image_only=False` has been deprecated since version 1.1. It will be changed to `image_only=True` in version 1.3.


In [7]:
post_transforms = Compose(
    [
        SaveImaged(
            keys="image",
            meta_keys="image_meta_dict",
            output_dir=data_base_dir,
            output_postfix="",
            resample=False,
        ),
    ]
)

In [8]:
for data_list_key in ["training", "validation"]:
# for data_list_key in ["test"]:
    # create a data loader
    process_data = load_decathlon_datalist(
        data_list_file_path,
        is_segmentation=True,
        data_list_key=data_list_key,
        base_dir=orig_data_base_dir,
    )
    process_ds = Dataset(
        data=process_data,
        transform=process_transforms,
    )
    process_loader = DataLoader(
        process_ds,
        batch_size=1,
        shuffle=False,
        pin_memory=False,
        collate_fn=no_collation,
    )

    print("-" * 10)
    for batch_data in process_loader:
        for batch_data_i in batch_data:
            batch_data_i = post_transforms(batch_data_i)

----------
2023-09-16 15:12:41,418 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL606729492858408/PL606729492858408.nii.gz
2023-09-16 15:13:13,069 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL278029848115415/PL278029848115415.nii.gz
2023-09-16 15:13:41,902 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL926008951016659/PL926008951016659.nii.gz
2023-09-16 15:14:18,728 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL259838368407140/PL259838368407140.nii.gz
2023-09-16 15:15:03,566 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL883384319849708/PL883384319849708.nii.gz
2023-09-16 15:15:45,248 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL824109098946531/PL824109098946531.nii.gz
2023-09-16 15:16:23,318 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL421132963270314/PL421132963270314.nii.gz
2023-09-16 15:17:01,877 INFO image_writer.py:197 - writing: /data/HC_Images_resample/PL210874