# Imports

In [2]:
!pip install SimpleITK --only-binary :all: 
!pip install nibabel --only-binary :all:

Collecting SimpleITK
  Downloading SimpleITK-2.2.0-cp38-cp38-win_amd64.whl (27.1 MB)
     ---------------------------------------- 27.1/27.1 MB 6.2 MB/s eta 0:00:00
Installing collected packages: SimpleITK
Successfully installed SimpleITK-2.2.0
Collecting nibabel
  Using cached nibabel-4.0.2-py3-none-any.whl (3.3 MB)
Installing collected packages: nibabel
Successfully installed nibabel-4.0.2


In [276]:
import os
import re
import json
import numpy as np
import nibabel as nib
import SimpleITK as sitk
from pathlib import Path
from collections import deque, defaultdict

In [277]:
def load_dicom(directory: str) -> np.array:
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(directory)
    reader.SetFileNames(dicom_names)
    image_itk = reader.Execute()

    image_zyx = sitk.GetArrayFromImage(image_itk).astype(np.int16)
    return image_zyx

# Read input data

In [278]:
# get all files in data
list_files = []
for (dirpath, _, filenames) in os.walk(Path("./data/subset/")):
    list_files.extend([Path(dirpath) / filename for filename in filenames])

In [291]:
# getting metadata
lungs_data = defaultdict(dict)
for file in list_files:
    file = str(file)
    
    if ".ipynb_checkpoints" in file:
        continue
    
    if os.path.splitext(file)[-1] == ".json":
        lung_num = int(re.compile(r"((?<=LUNG1-)\d+)", flags=re.IGNORECASE).search(file).group(0))
        lungs_data[lung_num]["img_metadata"], lungs_data[lung_num]["img"] = file, file[:-5]
    elif os.path.splitext(file)[-1] == ".gz":
        lung_num = int(re.compile(r"((?<=LUNG1-)\d+)", flags=re.IGNORECASE).search(file).group(0))
        lungs_data[lung_num]["mask"] = file

Save metadata to json file

In [292]:
with open("./data/metadata.json", "w") as json_file:
    json_data = json.dumps(lungs_data)
    json_file.write(json_data)

Read metadata

In [293]:
with open("./data/metadata.json", "r") as json_file:
    metadata = json.load(json_file)
    metadata = {int(k): v for k, v in metadata.items()}
    
print(f"Lungs nums:")
print(*metadata.keys())

Lungs nums:
1 2 5 8 13 16 18 24 26 28


Read input data

In [298]:
# read image
img = load_dicom(directory=metadata[1]["img"])
print(type(img), img.shape)

# read mask
mask = nib.load(metadata[1]["mask"])
mask = mask.get_fdata().transpose(2, 0, 1)
print(type(mask), mask.shape)

<class 'numpy.ndarray'> (134, 512, 512)
<class 'numpy.ndarray'> (134, 512, 512)
