In [1]:
from pathlib import Path
from tqdm.notebook import tqdm

# Import the Importer class
from eyened_orm.importer.importer import Importer
from eyened_orm import DBManager
from eyened_orm.utils.config import get_config

# from rtnls_fundusprep.preprocessor import FundusPreprocessor
# from rtnls_fundusprep.utils import preprocess_for_inference
from tqdm.notebook import tqdm

In [None]:
config = get_config("test")
DBManager.init(config)
session = DBManager.get_session()

In [3]:
# Set up project information
project_name = "FAU Fundus Dataset"

In [4]:
extract_dir = Path("/mnt/oogergo/eyened/public_data/av_segmentation/HRF_AV")

In [5]:
# Get all image paths
images_dir = extract_dir / "images"
image_paths = list(images_dir.glob("*"))

print(f"Found {len(image_paths)} images.")

Found 45 images.


In [6]:
# The dataset has images named as g0001.jpg, g0002.jpg, etc.
# Let's group images together in batches to demonstrate the hierarchy

# Create the data structure for the Importer
# Even though in this dataset each image is of a different patient
# We'll create one "patient" for every 10 images as an example
from datetime import date


data = []
batch_size = 10

for i in range(0, len(image_paths), batch_size):
    batch_images = image_paths[i : i + batch_size]

    # Create a patient entry (without identifier, will be auto-generated)
    patient_item = {
        "patient_identifier": f"Patient_{i // batch_size + 1}",
        "studies": [
            {
                "study_date": date.today(),
                "series": [
                    {
                        "images": [
                            {
                                "image": str(img_path.absolute()),
                                "props": {"OldPath": img_path.stem},
                            }
                            for img_path in batch_images
                        ],
                    }
                ],
            }
        ],
    }

    data.append(patient_item)

print(f"Created data structure with {len(data)} patients.")
print(f"First patient has {len(data[0]['studies'][0]['series'][0]['images'])} images.")

Created data structure with 5 patients.
First patient has 10 images.


### Summaries

The summary method will return a summary of created ORM objects without writing anything to the database

In [7]:
# Create the importer with default settings
# Because none of the objects exist
# We run into an error if we run with default settings: create_series=True, create_studies=False, create_patients=False
importer = Importer(
    session=session,
    project_name=project_name,
    run_ai_models=True, 
    generate_thumbnails=True,
)

# Display a summary of what will be imported
import_summary = importer.summary(data)

RuntimeError: Patient with identifier 'Patient_1' not found and create_patients=False

In [8]:
# We need to specify create_patients=True, create_studies=True
# since we're not providing identifiers
importer = Importer(
    session=session,
    project_name=project_name,
    create_patients=True,
    create_studies=True,
    run_ai_models=False, 
    generate_thumbnails=True,
)

# Display a summary of what will be imported
import_summary = importer.summary(data)


Import Summary for Project: FAU Fundus Dataset
----------------  Object Statistics  ----------------
  Entity  Total  New  Existing
Patients      5    5         0
 Studies      5    5         0
  Series      5    5         0
  Images     45   45         0
-----------  Column Population Statistics  -----------
- only for new entities
- values set to NULL are not considered populated

Populated Patient Columns:
           Column  Populated Percentage
PatientIdentifier          5     100.0%

Populated Study Columns:
   Column  Populated Percentage
StudyDate          5     100.0%

Populated Series Columns:
No populated columns found

Populated Image Columns:
           Column  Populated Percentage
DatasetIdentifier         45     100.0%
          OldPath         45     100.0%


### Execution

Once satisfied with the summary, commit the import to the database with `importer.exec`
There might still be Exceptions generated during import, in which case nothing will change in the DB and no files will be written.

Post-insertion scripts will run after insertion for non-essential steps such as:

- Thumbnail generation (highly recommended to run)
- Running image preprocessing scripts (eg. CFI bounds detection)
- Running AI models which populate DB columns
- Hashing the files for error and duplicate checks

In [9]:
importer.exec(data)

                                                                             

Created 45 image instances


                                                                       

Successfully committed to database


Running post-processing:  75%|███████▌  | 3/4 [00:00<00:00,  4.11it/s]

Found 46 images without thumbnails



  0%|          | 0/46 [00:00<?, ?it/s]
  2%|▏         | 1/46 [00:00<00:13,  3.37it/s]
  4%|▍         | 2/46 [00:00<00:11,  3.93it/s]
  7%|▋         | 3/46 [00:00<00:10,  4.23it/s]
  9%|▊         | 4/46 [00:00<00:09,  4.39it/s]
 11%|█         | 5/46 [00:01<00:09,  4.55it/s]
 13%|█▎        | 6/46 [00:01<00:09,  4.02it/s]
 15%|█▌        | 7/46 [00:01<00:11,  3.29it/s]
 17%|█▋        | 8/46 [00:02<00:11,  3.31it/s]
 20%|█▉        | 9/46 [00:02<00:09,  3.87it/s]
 22%|██▏       | 10/46 [00:02<00:08,  4.27it/s]
 26%|██▌       | 12/46 [00:03<00:11,  3.00it/s]
 28%|██▊       | 13/46 [00:03<00:10,  3.23it/s]
 30%|███       | 14/46 [00:03<00:09,  3.49it/s]
 33%|███▎      | 15/46 [00:04<00:08,  3.51it/s]
 35%|███▍      | 16/46 [00:04<00:07,  3.82it/s]
 37%|███▋      | 17/46 [00:04<00:07,  4.12it/s]
 39%|███▉      | 18/46 [00:04<00:06,  4.20it/s]
 41%|████▏     | 19/46 [00:05<00:07,  3.80it/s]
 43%|████▎     | 20/46 [00:05<00:07,  3.55it/s]
 46%|████▌     | 21/46 [00:05<00:06,  3.65it/s]
 48%|████

Project(12035, FAU Fundus Dataset, 1)

### Inspect Results

The project is now in the DB. We can easily generate a dataframe with image details to inspect results

In [10]:
# inspect the project using the ORM
from eyened_orm import Project


project = Project.by_name(session, 'FAU Fundus Dataset')

In [11]:
project.make_dataframe(session)

Unnamed: 0,image_id,patient_id,patient_identifier,study_id,study_date,series_id,path
0,2515790,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
1,2515791,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
2,2515792,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
3,2515793,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
4,2515794,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
5,2515795,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
6,2515796,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
7,2515797,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
8,2515798,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
9,2515799,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...


### Updating existing projects

Images can be inserted into existing projects, patients, studies and series by passing an existing project name, patient_identifier, study_date or series_id in the input structure. These will be matched to database entities (taking into account their nested structure). 

In [12]:
# to simulate inserting into an existing project, we'll insert the same data again
# use create_patients=False, create_studies=False, create_series=False when inserting into existing objects to ensure that no new objects will be created
# copy_files=True will copy the files to a configurable directory
importer = Importer(
    session=session,
    project_name=project_name,
    run_ai_models=True, 
    generate_thumbnails=True,
    copy_files=True
)

# Display a summary of what will be imported
import_summary = importer.summary(data)

Import Summary for Project: FAU Fundus Dataset
----------------  Object Statistics  ----------------
  Entity  Total  New  Existing
Patients      5    0         5
 Studies      5    0         5
  Series      5    5         0
  Images     45   45         0
-----------  Column Population Statistics  -----------
- only for new entities
- values set to NULL are not considered populated

Populated Series Columns:
No populated columns found

Populated Image Columns:
           Column  Populated Percentage
DatasetIdentifier         45     100.0%
          OldPath         45     100.0%


In [13]:
importer.exec(data)

                                                                             

Created 45 image instances


                                                                       

Successfully committed to database


Running post-processing:  80%|████████  | 4/5 [00:05<00:02,  2.40s/it]

No images to process
Found 45 images without thumbnails



  0%|          | 0/45 [00:00<?, ?it/s]
  2%|▏         | 1/45 [00:00<00:08,  4.90it/s]
  4%|▍         | 2/45 [00:00<00:09,  4.61it/s]
  7%|▋         | 3/45 [00:00<00:09,  4.60it/s]
  9%|▉         | 4/45 [00:00<00:08,  4.81it/s]
 11%|█         | 5/45 [00:01<00:08,  4.66it/s]
 13%|█▎        | 6/45 [00:01<00:08,  4.51it/s]
 16%|█▌        | 7/45 [00:01<00:07,  4.90it/s]
 18%|█▊        | 8/45 [00:01<00:07,  4.81it/s]
 20%|██        | 9/45 [00:01<00:08,  4.50it/s]
 22%|██▏       | 10/45 [00:02<00:08,  4.07it/s]
 24%|██▍       | 11/45 [00:02<00:08,  3.84it/s]
 27%|██▋       | 12/45 [00:02<00:07,  4.21it/s]
 29%|██▉       | 13/45 [00:03<00:08,  3.95it/s]
 31%|███       | 14/45 [00:03<00:07,  4.40it/s]
 33%|███▎      | 15/45 [00:03<00:07,  3.95it/s]
 38%|███▊      | 17/45 [00:03<00:06,  4.55it/s]
 40%|████      | 18/45 [00:04<00:06,  4.37it/s]
 42%|████▏     | 19/45 [00:04<00:05,  4.35it/s]
 44%|████▍     | 20/45 [00:04<00:07,  3.43it/s]
 47%|████▋     | 21/45 [00:05<00:07,  3.39it/s]
 49%|████

Project(12035, FAU Fundus Dataset, 1)

In [14]:
project = Project.by_name(session, 'FAU Fundus Dataset')
project.make_dataframe(session)

Unnamed: 0,image_id,patient_id,patient_identifier,study_id,study_date,series_id,path
0,2515790,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
1,2515791,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
2,2515792,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
3,2515793,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
4,2515794,1696747,Patient_1,1733668,2025-04-10,2003539,/mnt/oogergo/eyened/public_data/av_segmentatio...
...,...,...,...,...,...,...,...
85,2515875,1696751,Patient_5,1733672,2025-04-10,2003548,/mnt/oogergo/eyened/misc/197411f47c743cedebd4d...
86,2515876,1696751,Patient_5,1733672,2025-04-10,2003548,/mnt/oogergo/eyened/misc/ff079a12ea468e4a1843b...
87,2515877,1696751,Patient_5,1733672,2025-04-10,2003548,/mnt/oogergo/eyened/misc/7004f016d2a0e3ea27891...
88,2515878,1696751,Patient_5,1733672,2025-04-10,2003548,/mnt/oogergo/eyened/misc/7c64c16a7de76f9e3187a...
