In [1]:
import sys

sys.path.append(".../pipeline_imaging/imaging")  #'/path/to/your/project'

from imaging_cirrus_root import Cirrus
import imaging_utils
import imaging_classifying_rules
import os

# Folder structure

In [2]:
# Pooled date folders across three sites, after fda processing in the windonw VM
# step1 (pooled)/
# └── Cirrus/
#     ├── UAB_Cirrus_20231201-20231231_1.fda.zip
#     ├── UAB_Cirrus_20231201-20231231_3.fda.zip
#     └── ... (additional zip folders)

# => unzip each folder =>

# folders are unzipped, and ".fda.zip" was replaced to "_fda"
# step2/
# └── Cirrus/
#     ├── UW_Cirrus_20231204-20240105_1_fda
#     ├── UW_Cirrus_20231204-20240105_2_fda
#     └── ... (additional unzipped folders)
#         ├──── AIREADI-1075_350x350_OD_20231204_114218_SRLAngioEnface.dcm
#         ├──── AIREADI-1075_350x350_OD_20231204_114218_SRLAngioEnface.dcm
#         └── ... (additional DICOM files)

#  => cirrus_instance.organize =>

# DICOM files orangnized by protocols 4 subfolders, two subfolders for unknown protocol and critical info missing (if any)
# protocol names are added to folders and dcm files inside that folder
# step3/
# └── Cirrus/
#     ├── cirrus_mac_angiography
#     ├── cirrus_mac_macular_cube
#     ├── cirrus_onh_angiography
#     ├── cirrus_onh_optic_disc_cube
#     └── unknown_protocol
#         ├──── unknown_protocol_AIREADI_R_UAB_Maestro2_UAB_Maestro2_20240629-20240706_7001-M-39_fda
#         └──── unknown_protocol_AIREADI_L_UAB_Maestro2_UAB_Maestro2_20240629-20240706_7001-M-39_fda
#                 ├──── unknown_protocol_AIREADI_L_UAB_Maestro2_UAB_Maestro2_20240629-20240706_7001-M-39_fda_2.16.840.1.114517.10.5.1.4.307064520230724155117.1.1.dcm
#                 ├──── unknown_protocol_AIREADI_L_UAB_Maestro2_UAB_Maestro2_20240629-20240706_7001-M-39_fda_2.16.840.1.114517.10.5.1.4.307064520230724155117.2.1.dcm
#                 └── ... (additional DICOM files)

#  => cirrus_instance.onvert =>

# DICOM files are formatted to be NEMA compliant, (only for known 4 protocols,) still organized by protocols. No conversion for unknown protocol and critical_info_missing
# "converted_" is added to the file names, and now there are a list of dicom files in each folder
# step4/
# └── Cirrus/
#     ├── cirrus_mac_angiography
#     ├── cirrus_mac_macular_cube
#     └── cirrus_onh_angiography
#     ├── cirrus_onh_optic_disc_cube
#         ├──── converted_cirrus_mac_angiography_AIREADI-1075_R_AIREADI-1075_350x350_OD_20231204_114218_AVAngioEnfaceProjectionRemoved.dcm
#         ├──── converted_cirrus_mac_angiography_AIREADI-1075_R_AIREADI-1075_350x350_OD_20231204_114218_AVAngioEnfaceProjectionRemoved.dcm
#         ├──── converted_cirrus_mac_angiography_AIREADI-1075_R_AIREADI-1075_350x350_OD_20231204_114218_AVAngioEnfaceProjectionRemoved.dcm
#         └── ... (additional DICOM files)


#  => imaging_utils.format_file => (this process is univeral to all images)

# De-identified again, file renamed, and organized in a final structure for data relase
# step5/
#  └──retinal_photography/
#     └─── ir/
# #         └── zeiss_cirrus
#                 ├── 1001
#                 ├── 1002
#                 └───1075
#                        ├── 1075_cirrus_disc_6x6_octa_ir_r_1.2.276.0.75.2.2.42.215507196755019.20231204114306829.5628829040.dcm
#                        └── 1075_cirrus_disc_oct_ir_r_1.2.276.0.75.2.2.42.215507196755019.20231204114423547.5628910920.dcm
#                        └── ... (additional DICOM files)
#  └──retinal_oct/
#     └─── oct_structural_scan/
#             └── zeiss_cirrus
#                 ├── 1001
#                 ├── 1002
#                 └─── 1075
#                        ├── 1075_cirrus_disc_6x6_octa_oct_r_1.2.276.0.75.2.2.42.215507196755019.20231204114306766.5628382640.dcm
#                        └── 1075_cirrus_disc_oct_oct_r_1.2.276.0.75.2.2.42.215507196755019.20231204114423500.5628643170.dcm
#                        └── ... (additional DICOM files)

#  └──retinal_octa/
#     └─── enface/
#           └── zeiss_cirrus
#     └─── flow_cube/
# #         └─ zeiss_cirrus
#     └─── segmentation/
# #         └─ zeiss_cirrus
#                 ├── 1001
#                 ├── 1002
#                 └─── 1075
#                        ├── 1075_cirrus_disc_6x6_octa_segmentation_r_1.2.276.0.75.2.2.42.215507196755019.20231204114306829.5628829041.dcm
#                        └── 1075_cirrus_disc_6x6_octa_segmentation_r_1.2.276.0.75.2.2.42.215507196755019.20231204114306829.5628829041.dcm

# For this, we have to change one .py in the pydicom package  (you can run this multiple times, it will lead to the same outcome)

In [3]:
from pydicom.datadict import DicomDictionary, keyword_dict
from pydicom.dataset import Dataset


# Define items as (VR, VM, description, is_retired flag, keyword)
#   Leave is_retired flag blank.
new_dict_items = {
    0x0022EEE0: (
        "SQ",
        "1",
        "En Face Volume Descriptor Sequence",
        "",
        "EnFaceVolumeDescriptorSequence",
    ),
    0x0022EEE1: (
        "CS",
        "1",
        "En Face Volume Descriptor Scope",
        "",
        "EnFaceVolumeDescriptorScope",
    ),
    0x0022EEE2: (
        "SQ",
        "1",
        "Referenced Segmentation Sequence",
        "",
        "ReferencedSegmentationSequence",
    ),
    0x0022EEE3: ("FL", "1", "Surface Offset", "", "SurfaceOffset"),
}

# Update the dictionary itself
DicomDictionary.update(new_dict_items)

# Update the reverse mapping from name to tag
new_names_dict = dict([(val[4], tag) for tag, val in new_dict_items.items()])
keyword_dict.update(new_names_dict)




# Cirrus instance

In [None]:
cirrus_instance = Cirrus()

# Unzip each folder, same name

In [None]:
step1 = ".../step1"
step2 = ".../step2"
device = "Cirrus"

zips = imaging_utils.list_zip_files(f"{step1}/{device}")

print(zips)

for zip in zips:
    unzip_file = imaging_utils.unzip_fda_file(zip, step2)

# Organize folders by protocol

In [None]:
step2 = ".../step2"
step3 = ".../step3"
device = "Cirrus"


folders = imaging_utils.list_subfolders(f"{step2}/{device}")

for folder in folders:
    organize_result = cirrus_instance.organize(folder, f"{step3}/{device}")

# Convert files


In [None]:
step3 = ".../step3"
step4 = ".../step4"
device = "Cirrus"


protocols = [
    "cirrus_mac_angiography",
    "cirrus_mac_macular_cube",
    "cirrus_onh_angiography",
    "cirrus_onh_optic_disc_cube",
]

for protocol in protocols:

    output = f"{step4}/{device}/{protocol}"
    if not os.path.exists(output):
        os.makedirs(output)

    folders = imaging_utils.list_subfolders(f"{step3}/{device}/{protocol}")

    for folder in folders:
        organize_result = cirrus_instance.convert(
            folder, f"{step4}/{device}/{protocol}"
        )

# Files are de-identified again, file renamed, and organized in a final structure for data relase 

In [None]:
import imaging_utils

step4 = ".../step4"
step5 = ".../step5"
device = "Cirrus"
metadata_folder = ".../step_metadata"

test = f"{step4}/{device}"
device_list = [test]


for folder in device_list:
    filelist = imaging_utils.get_filtered_file_names(folder)

    for file in filelist:
        full_file_path = imaging_utils.format_file(file, f"{step5}/{device}")
        cirrus_instance.metadata(full_file_path, metadata_folder)