In [None]:
import pandas as pd
import numpy as np
from pydicom import dcmread
import pydicom_seg
import SimpleITK as sitk
import dicom2nifti
import glob

In [None]:
metadata = pd.read_csv('../data/manifest-1603198545583/metadata.csv')
metadata['File Location'] = '../data/manifest-1603198545583'  + metadata['File Location'].str[1:]
SEG_metadata = metadata[metadata['Manufacturer'] == 'SEG']
RTSTRUCT_metadata = metadata[metadata['Manufacturer'] == 'RTSTRUCT']
CT_metadata = metadata[metadata['Manufacturer'] == 'CT']

In [None]:
def get3D(paths):
    # load the DICOM files
    files = []

    for fname in paths:
        files.append(dcmread(fname))

    print("file count: {}".format(len(files)))

    # skip files with no SliceLocation (eg scout views)
    slices = []
    skipcount = 0
    for f in files:
        if hasattr(f, 'SliceLocation'):
            slices.append(f)
        else:
            skipcount = skipcount + 1

    print("skipped, no SliceLocation: {}".format(skipcount))

    # ensure they are in the correct order
    slices = sorted(slices, key=lambda s: s.SliceLocation)

    # pixel aspects, assuming all slices are the same
    ps = slices[0].PixelSpacing
    ss = slices[0].SliceThickness
    ax_aspect = ps[1]/ps[0]
    sag_aspect = ps[1]/ss
    cor_aspect = ss/ps[0]

    # create 3D array
    img_shape = list(slices[0].pixel_array.shape)
    img_shape.append(len(slices))
    img3d = np.zeros(img_shape)

    # fill 3D array with the imaæes from the files
    for i, s in enumerate(slices):
        img2d = s.pixel_array
        img3d[:, :, i] = img2d
    
    return img3d

In [None]:
!mkdir ../data/preprocessed
!mkdir ../data/preprocessed/img
!mkdir ../data/preprocessed/seg

In [None]:
dicom2nifti.settings.disable_validate_slice_increment()

## CT scan

In [None]:
for s_id in CT_metadata['Subject ID'].values:
    print(CT_metadata[CT_metadata['Subject ID'] == s_id]["Data Description URI"].values[0], end=" ")
    input_path = CT_metadata[CT_metadata['Subject ID'] == s_id]['File Location'].values[0]
    output_path = CT_metadata[CT_metadata['Subject ID'] == s_id]["Data Description URI"].values[0]
    dicom2nifti.dicom_series_to_nifti(input_path, f'../data/preprocessed/img/{output_path}', reorient_nifti=False)

## Segments

In [None]:
for s_id in SEG_metadata['Subject ID'].values:
    patient = SEG_metadata[SEG_metadata['Subject ID'] == s_id]["Data Description URI"].values[0]
    print(patient, end=" ")
        
    path = glob.glob(SEG_metadata[SEG_metadata['Subject ID'] == s_id]['File Location'].values[0] + '/*')[0]
    dcm = dcmread(path)
    
    # to access the name of the body part
    
    elem = dcm[0x0062, 0x0002]

    reader = pydicom_seg.SegmentReader()
    result = reader.read(dcm)
    

    for i, segment_number in enumerate(result.available_segments):
        
        # anat = anatomy
        anat = elem.value[i][0x0062, 0x0006].value

        seg = result.segment_image(segment_number)  # lazy construction
        sitk.WriteImage(seg, f'../data/preprocessed/seg/{patient}-{segment_number}-{anat}.nii', True)
