In [None]:
from random import sample
import json
import base64
import gzip
from io import BytesIO
import pandas as pd
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm
from sqlalchemy import select, func
from eyened_orm import (
    ImageInstance,
    Modality,
    Feature,
    Annotation,
    AnnotationData,
    AnnotationType,
    Segmentation,
    Creator
)
from eyened_orm.Segmentation import Datatype, DataRepresentation
from eyened_orm.db import Database

In [None]:
database = Database('../dev/.env')
session = database.create_session()

creating engine with connection string mysql+pymysql://root:t8S3sBPyxTFfDEsfucFBKDU2S7G7Xtm5@eyened-gpu:22114/eyened_database


In [None]:
def get_annotations_with_annotation_type(annotation_type_ids, where=None):
    #
    query = (
        select(Annotation, ImageInstance)
        # .join_from(Annotation, AnnotationData, isouter=True)
        .join_from(Annotation, ImageInstance, isouter=True)
        .join_from(Annotation, Creator)
        .where(
            ~Annotation.Inactive & 
            (Annotation.AnnotationTypeID.in_(annotation_type_ids)) &
            (Annotation.CreatorID != 1) &
            (Creator.IsHuman)
        )
    )
    
    if where is not None:
        query = query.where(where)
    
    all_annots = session.execute(
        query
        .order_by(func.random())
        .limit(5)
    ).all()
    return all_annots

In [None]:
def open_data(dpath, db_res=None):
    if dpath.suffix == ".gz":
        assert db_res is not None, "db_res is required for .gz files"
        with gzip.open(dpath, 'rb') as f:
            im = np.frombuffer(f.read(), dtype=np.uint8)
            im = im.reshape(db_res) # HWD
            # transpose to DHW
            im = im.transpose(2,0,1)
    else:
        im = Image.open(dpath)

        if im.mode != "L":
            print(f"Found mode {im.mode} for {dpath}")
            im = im.convert('L')

        im = np.array(im)
        
    if len(im.shape) == 2:
        im = im[None,...].astype(np.uint8)
    
    if len(im.shape) != 3:
        raise RuntimeError(f'Found shape {im.shape} for {dpath}')

    return im # DHW


def convert_one_annotation_basic(annot, annot_data, image_instance):

    res_db = (image_instance.Rows_y, image_instance.Columns_x, image_instance.NrOfFrames)

    try:
        im = open_data(annot_data.path, res_db)
    except Exception as e:
        raise RuntimeError(f'Error opening {annot_data.path}: {e}') from e
    
    if len(im.shape) != 3:
        raise RuntimeError(f'Found shape {im.shape} for {annot_data.path}')

    depth, height, width = im.shape
    segmentation = Segmentation(
        Depth=depth,
        Height=height,
        Width=width,
        SparseAxis=0,
        ScanIndices=None,
        ImageProjectionMatrix=None,
        DataRepresentation=DataRepresentation.Binary,
        DataType=Datatype.R8UI,
        ImageInstanceID=image_instance.ImageInstanceID,
        CreatorID=annot.CreatorID,
        FeatureID = annot.FeatureID
    )

    session.add(segmentation)
    session.flush([segmentation])

    segmentation.write_data(im)

    return segmentation

In [None]:
# artery-vein annotations
def convert_av_annotations():
    import json
    import base64

    feature_map = {
        'Artery': Feature.by_name(session, 'Arteries').FeatureID,
        'Vein': Feature.by_name(session, 'Veins').FeatureID,
        'Vessel': Feature.by_name(session, 'Unknown Vessel').FeatureID,
    }

    annotations = []
    segmentations = []
    for annot, image_instance in get_annotations_with_annotation_type([9]):
        if len(annot.AnnotationData) == 0:
            print('Found no annotation data for ', annot.AnnotationID)
            continue

        if len(annot.AnnotationData) > 1:
            print('Found multiple annotation data for ', annot.AnnotationID)
            continue
        annot_data = annot.AnnotationData[0]

        assert annot_data.path.suffix == ".json", annot_data.path

        if image_instance is None:
            print(f"Found image_instance is None for {annot_data.path}, annot_id: {annot.AnnotationID}")
            continue

        with open(annot_data.path, 'r') as f:
            data = json.load(f)

        if 'maskID' not in data:
            raise RuntimeError(f"Found maskID not in data for {annot_data.path}, keys are {data.keys()}")
            

        mask_annot = Annotation.by_id(session, data['maskID'])
        if mask_annot is None:
            print(f"Found mask is None for {annot_data.path}, annot_id: {annot.AnnotationID}")
            continue

        assert mask_annot.ImageInstance is not None
            
        vessels_segmentation = convert_one_annotation_basic(mask_annot, mask_annot.AnnotationData[0], mask_annot.ImageInstance)


        # img_size = (annot[2].Columns_x, annot[2].Rows_y)
        width, height = image_instance.Columns_x, image_instance.Rows_y
        if 'branches' not in data:
            print(f"Found branches not in data for {annot_data.path}, keys are {data.keys()}")
            continue

        for branch in data['branches']:
            print(branch.keys())
            print(branch['vesselType'])
            drawing = branch['drawing'][22:]
            drawing = base64.b64decode(drawing)
            # drawing = np.frombuffer(drawing, dtype=np.uint8)
            drawing = Image.open(BytesIO(drawing))
            drawing = np.array(drawing)[:, :, 0]
            drawing = drawing.reshape(height, width)
            drawing = drawing[None,...]

            feature_id = feature_map[branch['vesselType']]

            # put in Zarr
            segmentation = Segmentation(
                Depth=1,
                Height=height,
                Width=width,
                SparseAxis=0,
                ScanIndices=None,
                ImageProjectionMatrix=None,
                DataRepresentation=DataRepresentation.Binary,
                DataType=Datatype.R8UI,
                ImageInstanceID=image_instance.ImageInstanceID,
                CreatorID=annot.CreatorID,
                FeatureID=feature_id,
                ReferenceSegmentationID=vessels_segmentation.SegmentationID
            )

            session.add(segmentation)
            session.flush([segmentation])

            segmentation.write_data(drawing)

            annotations.append(annot)
            segmentations.append(segmentation)

    session.commit()
    return annotations, segmentations

In [6]:
annotations, segmentations = convert_av_annotations()

Found mode RGBA for /mnt/oogergo/eyened/eyened_platform/annotations/1689254/399090_0.png
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Artery
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Vein
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Vessel
array_name: uint8_1_1024_1024.zarr
Found mode RGBA for /mnt/oogergo/eyened/eyened_platform/annotations/381691/80654_0.png
array_name: uint8_1_1934_1960.zarr
Found mode RGBA for /mnt/oogergo/eyened/eyened_platform/annotations/1689254/399082_0.png
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Artery
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Vein
array_name: uint8_1_1024_1024.zarr
dict_keys(['id', 'vesselType', 'drawing', 'color'])
Vessel
array_name: uint8_1_1024_1024.zarr
Found mode RGB for /mnt/oogergo/eyened/eyened_platform/annotations/168

In [7]:
for annot, seg in zip(annotations, segmentations):
    print(annot.AnnotationID, seg.SegmentationID, seg.ImageInstanceID)


399390 41 2230720
399390 42 2230720
399390 43 2230720
399382 46 2230712
399382 47 2230712
399382 48 2230712
399133 50 2230463
399133 51 2230463
399133 52 2230463
107277 54 708115
107277 55 708115
107277 56 708115
