In [7]:
import sys

sys.path.append(".../pipeline_imaging/imaging")  #'/path/to/your/project'

from imaging_maestro2_triton_root import Maestro2_Triton
import imaging_utils
import imaging_classifying_rules
import pydicom
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
from pydicom.pixel_data_handlers.util import convert_color_space
from tqdm import tqdm
import pandas as pd
import json
import shutil

In [14]:
import os


def get_modified_json_filenames(folder_path):
    json_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Check if the file ends with .json and doesn't start with a dot
            if file.endswith(".json") and not file.startswith("."):
                # Remove the .json extension and replace _ and . with ''
                modified_name = file[:-8].replace("_", "").replace(".", "")
                json_files.append(modified_name)

    return json_files


def get_modified_dicom_filenames(folder_path):
    dicom_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Check if the file ends with .json and doesn't start with a dot
            if file.endswith(".dcm") and not file.startswith("."):
                # Remove the .json extension and replace _ and . with ''
                modified_name = file[:-4].replace("_", "").replace(".", "")
                dicom_files.append(modified_name)

    return dicom_files


def get_json_filenames(folder_path):
    json_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Check if the file ends with .json and doesn't start with a dot
            if file.endswith(".json") and not file.startswith("."):
                # Append the full path by joining root and file
                full_path = os.path.join(root, file)
                json_files.append(full_path)

    return json_files

# double check if the json files are the same as the dicom files

In [None]:
metadata_folder = "...stage-1-container/AI-READI/completed/imaging-metadata"
imaging_folder = "...stage-1-container/AI-READI/completed/imaging"
dicom_files = get_modified_dicom_filenames(imaging_folder)
json_files = get_modified_json_filenames(metadata_folder)

# check if the json files are the same as the dicom files
print(set(sorted(json_files)) == set(sorted(dicom_files)))

# retinal photography manifest creation

In [None]:
retinal_photography = "retinal_photography"

files = get_json_filenames(f"{metadata_folder}/{retinal_photography}")

data = []

for json_file in files:
    with open(json_file, "r") as file:
        json_data = json.load(file)

        flattened_data = [value for key, value in json_data.items()]

        df = pd.DataFrame(flattened_data)

        df_filtered = df[
            [
                "participant_id",
                "manufacturer",
                "manufacturers_model_name",
                "laterality",
                "anatomic_region",
                "imaging",
                "height",
                "width",
                "color_channel_dimension",
                "sop_instance_uid",
                "filepath",
            ]
        ]

        data.append(df_filtered)

# Concatenate all DataFrames in the list into one large DataFrame
final_df = pd.concat(data, ignore_index=True)
final_df = final_df.sort_values(by="participant_id")

final_df.to_csv(
    "...stage-1-container/AI-READI/completed/imaging/retinal_photography/manifest.tsv",
    sep="\t",
    index=False,
)

# retinal oct manifest creation (needs to be done after creating retinal_photography manifest creation)

In [17]:
retinal_oct = "retinal_oct"
input_op = (
    "...stage-1-container/AI-READI/completed/imaging/retinal_photography/manifest.tsv"
)


# Load the input_op TSV file
input_df = pd.read_csv(input_op, sep="\t")

files = get_json_filenames(f"{metadata_folder}/{retinal_oct}")

# Read JSON files and make a DataFrame
data = []

for json_file in files:
    with open(json_file, "r") as file:
        json_data = json.load(file)

        flattened_data = [value for key, value in json_data.items()]

        # Convert the flattened data into a DataFrame
        df = pd.DataFrame(flattened_data)

        # Show only specific columns you are interested in: 'participant_id', 'filepath', 'manufacturer'

        # Filter specific columns
        df_filtered = df[
            [
                "participant_id",
                "manufacturer",
                "manufacturers_model_name",
                "anatomic_region",
                "imaging",
                "laterality",
                "height",
                "width",
                "number_of_frames",
                "pixel_spacing",
                "slice_thickness",
                "sop_instance_uid",
                "filepath",
                "reference_retinal_photography_image_instance_uid",
            ]
        ].copy()

        #  Add the "reference_filepath" by matching "reference_instance_uid" with the "sop_instance_uid" in input_op
        df_filtered.loc[:, "reference_filepath"] = df_filtered[
            "reference_retinal_photography_image_instance_uid"
        ].map(input_df.set_index("sop_instance_uid")["filepath"])

        df_filtered.rename(
            columns={
                "reference_retinal_photography_image_instance_uid": "reference_instance_uid"
            },
            inplace=True,
        )

        data.append(df_filtered)


final_df = pd.concat(data, ignore_index=True)
final_df = final_df.sort_values(by="participant_id")


final_df.to_csv(
    "...stage-1-container/AI-READI/completed/imaging/retinal_oct/manifest.tsv",
    sep="\t",
    index=False,
)

# retinal flio manifest creation

In [None]:
retinal_flio = "retinal_flio"

files = get_json_filenames(f"{metadata_folder}/{retinal_flio}")

# Read JSON files and make a DataFrame
data = []

for json_file in files:
    with open(json_file, "r") as file:
        json_data = json.load(file)

        flattened_data = [value for key, value in json_data.items()]

        # Step 2: Convert the flattened data into a DataFrame
        df = pd.DataFrame(flattened_data)

        # Step 3: Show only specific columns you are interested in: 'participant_id', 'filepath', 'manufacturer'

        df_filtered = df[
            [
                "participant_id",
                "manufacturer",
                "manufacturers_model_name",
                "laterality",
                "wavelength",
                "height",
                "width",
                "number_of_frames",
                "sop_instance_uid",
                "filepath",
            ]
        ]

        # Step 4: Append the filtered DataFrame to the data list
        data.append(df_filtered)

# Step 5: Concatenate all DataFrames in the list into one large DataFrame
final_df = pd.concat(data, ignore_index=True)
final_df = final_df.sort_values(by="participant_id")

final_df.to_csv(
    "...stage-1-container/AI-READI/completed/imaging/retinal_flio/manifest.tsv",
    sep="\t",
    index=False,
)