In [1]:
import requests
from pathlib import Path
from tqdm.notebook import tqdm
import json
import base64
from datetime import date
import pandas as pd
from utils import download_and_extract_dataset

# API configuration
API_BASE_URL = "http://eyened-gpu:2222/api"  # Adjust this to your server URL
API_USERNAME = "admin"  # Replace with your API username
API_PASSWORD = "CHANGE_ME"  # Replace with your API password

# Create authentication header
auth_str = f"{API_USERNAME}:{API_PASSWORD}"
auth_bytes = auth_str.encode('ascii')
base64_auth = base64.b64encode(auth_bytes).decode('ascii')
headers = {
    "Authorization": f"Basic {base64_auth}",
    "Content-Type": "application/json"
}

In [5]:
# download HRF dataset
download_and_extract_dataset('https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/all.zip', 'hrfav')

Downloading https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/all.zip...


100%|██████████| 72.8M/72.8M [00:00<00:00, 84.4MB/s]


Extracting to hrfav...
Download and extraction complete!


'hrfav'

In [3]:
# Set up project information
project_name = "HRFAV Fundus Dataset"
extract_dir = Path("./hrfav")

In [4]:
# Get all image paths
images_dir = extract_dir / "images"
image_paths = list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.JPG"))

print(f"Found {len(image_paths)} images.")

Found 45 images.


In [5]:
# The dataset has images named as g0001.jpg, g0002.jpg, etc.
# Let's group images together in batches to demonstrate the hierarchy

# Create the data structure for the Importer
# Even though in this dataset each image is of a different patient
# We'll create one "patient" for every 10 images as an example
data = []
batch_size = 4

for i in range(0, len(image_paths), batch_size):
    batch_images = image_paths[i : i + batch_size]

    # Create a patient entry (without identifier, will be auto-generated)
    patient_item = {
        "patient_identifier": f"Patient_{i // batch_size + 1}",
        "studies": [
            {
                "study_date": date.today().isoformat(),  # Convert date to ISO format string for JSON
                "series": [
                    {
                        "images": [
                            {
                                "image": str(img_path.absolute()),
                                "props": {
                                    "OldPath": img_path.stem,
                                    "Laterality": "R" if i % 2 == 0 else "L"
                                    # important! laterality is currently required by the viewer
                                },
                            }
                            for i, img_path in enumerate(batch_images)
                        ],
                    }
                ],
            }
        ],
    }

    data.append(patient_item)

print(f"Created data structure with {len(data)} patients.")
print(f"First patient has {len(data[0]['studies'][0]['series'][0]['images'])} images.")

Created data structure with 11 patients.
First patient has 4 images.


### Summaries

The summary endpoint will return a summary of what would be imported without writing anything to the database

In [6]:
# Create the request payload with default settings
# Because none of the objects exist
# We run into an error if we run with default settings: create_series=True, create_studies=False, create_patients=False
payload = {
    "data": data,
    "options": {
        "project_name": project_name,
        "run_ai_models": True,
        "generate_thumbnails": True,
    }
}

# Call the summary endpoint
response = requests.post(
    f"{API_BASE_URL}/import/summary",
    headers=headers,
    json=payload
)

# Check if the request was successful
if response.status_code == 200:
    result = response.json()
    if result["success"]:
        print("Summary generated successfully:")
        print(json.dumps(result["data"], indent=2))
    else:
        print(f"Error: {result['error']}")
        if result.get("stack_trace"):
            print("Stack trace:")
            print(result["stack_trace"])
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Error: Patient with identifier 'Patient_1' not found and create_patients=False


In [8]:
# We need to specify create_patients=True, create_studies=True
# since we're not providing identifiers
payload = {
    "data": data,
    "options": {
        "project_name": project_name,
        "create_patients": True,
        "create_studies": True,
        "run_ai_models": False,
        "generate_thumbnails": True,
        "include_stack_trace": True,
    }
}

# Call the summary endpoint
response = requests.post(
    f"{API_BASE_URL}/import/summary",
    headers=headers,
    json=payload
)

# Check if the request was successful
if response.status_code == 200:
    result = response.json()
    if result["success"]:
        print("Summary generated successfully:")
        summary = result["data"]
        print(pd.DataFrame(summary["general_stats"]).to_string(index=False))
        for name, stats in summary["column_stats"].items():
            if stats:
                print(f"\nPopulated {name.capitalize()} Columns:")
                df_columns = pd.DataFrame(stats)
                # Format percentage for display
                print(df_columns.to_string(index=False))
    else:
        print(f"Error: {result['error']}")
        if result.get("stack_trace"):
            print("Stack trace:")
            print(result["stack_trace"])
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Summary generated successfully:
  Entity  Total  New  Existing  New_Percentage  Existing_Percentage
Patients     11   11         0           100.0                  0.0
 Studies     11   11         0           100.0                  0.0
  Series     11   11         0           100.0                  0.0
  Images     44   44         0           100.0                  0.0

Populated Patients Columns:
           Column  Populated  Percentage
PatientIdentifier         11       100.0

Populated Studies Columns:
   Column  Populated  Percentage
StudyDate         11       100.0

Populated Images Columns:
           Column  Populated  Percentage
DatasetIdentifier         44       100.0
       Laterality         44       100.0
          OldPath         44       100.0


### Execution

Once satisfied with the summary, commit the import to the database with the exec endpoint
There might still be Exceptions generated during import, in which case nothing will change in the DB and no files will be written.

Post-insertion scripts will run after insertion for non-essential steps such as:

- Thumbnail generation (highly recommended to run)
- Running image preprocessing scripts (eg. CFI bounds detection)
- Running AI models which populate DB columns
- Hashing the files for error and duplicate checks

In [9]:
# Call the exec endpoint with the same payload
response = requests.post(
    f"{API_BASE_URL}/import/exec",
    headers=headers,
    json=payload
)

# Check if the request was successful
if response.status_code == 200:
    result = response.json()
    if result["success"]:
        print("Import completed successfully:")
        print(json.dumps(result["data"], indent=2))
    else:
        print(f"Error: {result['error']}")
        if result.get("stack_trace"):
            print("Stack trace:")
            print(result["stack_trace"])
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Import completed successfully:
{
  "project_name": "FAU Fundus Dataset"
}


### Inspect Results

The project is now in the DB. We can use the viewer to inspect.

### Updating existing projects

Images can be inserted into existing projects, patients, studies and series by passing an existing project name, patient_identifier, study_date or series_id in the input structure. These will be matched to database entities (taking into account their nested structure).

In [9]:
# To simulate inserting into an existing project, we'll insert the same data again
# Use create_patients=False, create_studies=False, create_series=False when inserting into existing objects to ensure that no new objects will be created
# copy_files=True will copy the files to a configurable directory
payload = {
    "data": data,
    "options": {
        "project_name": project_name,
        "run_ai_models": True,
        "generate_thumbnails": True,
        "copy_files": True
    }
}

# First, get a summary of what would be imported
response = requests.post(
    f"{API_BASE_URL}/import/summary",
    headers=headers,
    json=payload
)

# Check if the request was successful
if response.status_code == 200:
    result = response.json()
    if result["success"]:
        print("Summary generated successfully:")
        print(json.dumps(result["data"], indent=2))
    else:
        print(f"Error: {result['error']}")
        if result.get("stack_trace"):
            print("Stack trace:")
            print(result["stack_trace"])
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Error: importer_copy_path must be set when copy_files is True


In [None]:
# Now execute the import
response = requests.post(
    f"{API_BASE_URL}/import/exec",
    headers=headers,
    json=payload
)

# Check if the request was successful
if response.status_code == 200:
    result = response.json()
    if result["success"]:
        print("Import completed successfully:")
        print(json.dumps(result["data"], indent=2))
    else:
        print(f"Error: {result['error']}")
        if result.get("stack_trace"):
            print("Stack trace:")
            print(result["stack_trace"])
else:
    print(f"Error: {response.status_code}")
    print(response.text)

In [None]:
# Get the updated project data
response = requests.get(
    f"{API_BASE_URL}/projects/{project_name}",
    headers=headers
)

if response.status_code == 200:
    project_data = response.json()
    print(f"Project: {project_data['name']}")
    print(f"ID: {project_data['id']}")
    
    # Get the images for this project
    response = requests.get(
        f"{API_BASE_URL}/projects/{project_data['id']}/images",
        headers=headers
    )
    
    if response.status_code == 200:
        images = response.json()
        print(f"\nTotal images: {len(images)}")
        
        # Display the images
        import pandas as pd
        df = pd.DataFrame(images)
        print("\nImages:")
        display(df)
    else:
        print(f"Error getting images: {response.status_code}")
        print(response.text)
else:
    print(f"Error getting project: {response.status_code}")
    print(response.text)