In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

### Import needed python packages

In [3]:
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
import openslide
import pyvips
from tqdm import tqdm
import json

In [4]:
from exact_sync.v1.api.annotations_api import AnnotationsApi
from exact_sync.v1.api.images_api import ImagesApi
from exact_sync.v1.api.image_sets_api import ImageSetsApi
from exact_sync.v1.api.annotation_types_api import AnnotationTypesApi
from exact_sync.v1.api.products_api import ProductsApi
from exact_sync.v1.api.teams_api import TeamsApi

from exact_sync.v1.models import ImageSet, Team, Product, AnnotationType, Image, Annotation, AnnotationMediaFile
from exact_sync.v1.rest import ApiException
from exact_sync.v1.configuration import Configuration
from exact_sync.v1.api_client import ApiClient

### Connect to EXACT

In [None]:
username = 'marzahl'
password = '****'
wsi_paths = "D:\\Datasets\\EIPH WSI\\Katze"
host =  "https://exact.cs.fau.de/"

source_dataset = "Felidae-EIPH"
target_dataset = "Felidae-EIPH-Cluster"

In [5]:
local_configuration = Configuration()
local_configuration.username = username
local_configuration.password = password
local_configuration.host =  host

local_client = ApiClient(local_configuration)

local_image_sets_api = ImageSetsApi(local_client)
local_annotations_api = AnnotationsApi(local_client)
local_annotation_types_api = AnnotationTypesApi(local_client)
local_images_api = ImagesApi(local_client)
local_product_api = ProductsApi(local_client)

### Downlaod annotations

In [None]:
local_image_sets = local_image_sets_api.list_image_sets(name=source_dataset)
target_image_set = local_image_sets_api.list_image_sets(name=target_dataset).results[0]

local_images = {}
local_annotation_types = {}

for local_image_set in local_image_sets.results:
    for image in local_images_api.list_images(pagination=False, image_set=local_image_set.id).results:
        local_images[image.name] = image
    for product in local_image_set.product_set:
        for anno_type in local_annotation_types_api.list_annotation_types(product=product).results:
            local_annotation_types[anno_type.id] = anno_type

In [None]:
files = {p.name:p for p in Path(wsi_paths).glob("**/*.svs")}

In [None]:
level = 0
down_factor = 1

### Copy the cells to new images clustered by their width to height ratio.

 [***2.6  Semi-automatic data cleaning via customised clustering***](https://arxiv.org/abs/2108.08529)

In [None]:
threads = []
for image in tqdm(local_images.values()):

    annotations = []

    y_total_size = 5000
    x_total_size = 10000
    x_border = 10
    y_border = 10

    big_image = np.zeros(shape=(y_total_size, x_total_size, 3), dtype=np.uint8)
    big_image += 255


    slide_path = files[image.name]
    target_file = Path(image.name.replace("svs", "tiff"))

    local_annos = local_annotations_api.list_annotations(image=image.id, pagination=False).results
    data = [[anno.unique_identifier, anno.vector['x1'], anno.vector['y1'], anno.vector['x2'], anno.vector['y2'], anno.annotation_type, anno.meta_data]  for anno in local_annos]
    
    vector_data = np.array([[anno.vector['x1'], anno.vector['y1'], anno.vector['x2'], anno.vector['y2']]  for anno in local_annos])

    slide = openslide.open_slide(str(slide_path))

    df = pd.DataFrame(data=data, columns=["unique_identifier", "x_min", "y_min", "x_max", "y_max", "label", "meta_data"])
    df["width"] = vector_data[:, 2] -  vector_data[:, 0]
    df["hight"] = vector_data[:, 3] -  vector_data[:, 1]

    x_start = 0
    y_start = y_total_size
    x_max = 0

    for width in tqdm(sorted(df["width"].unique())):
        df_width = df[df["width"] == width].sort_values(by=['hight', 'label'])

        x_start = x_max + x_border
        y_start = y_total_size

        for offset_x, offset_y, w, h, label, meta_data,unique_identifier  in zip(df_width["x_min"], df_width["y_min"], df_width["width"], df_width["hight"], df_width["label"], df_width["meta_data"], df_width["unique_identifier"]):
            offset_x, offset_y, w, h = int(offset_x), int(offset_y), int(w), int(h)

            # if image hight is reached start with new column
            if y_start - h  <= 0:
                y_start = y_total_size
                x_start = x_max + x_border

            # if end of bin image increase big image size
            if x_start + w >= x_total_size:
                z = np.zeros((y_total_size, max(1000, w), 3), dtype=np.uint8)
                z += 255
                big_image = np.concatenate((big_image, z), axis=1)

                x_total_size += max(1000, w)

            patch = np.array(slide.read_region(location=(int(offset_x * down_factor), int(offset_y * down_factor)),
                                                level=level, size=(w, h)))[:, :, :3]


            min_y = y_start - h
            max_y = y_start
            big_image[min_y:max_y, x_start:x_start+w] = patch

            
            vector = {"x1": x_start + 5, "y1": min_y + 5, "x2": x_start + w - 5, "y2": max_y - 5}
            
            annotation = Annotation(annotation_type=label, vector=vector, unique_identifier=unique_identifier, meta_data=meta_data)
            annotations.append(annotation)

            y_start -= (h + y_border)
            x_max = max(x_max, x_start+w)
            
    height, width, bands = big_image.shape
    linear = big_image.reshape(width * height * bands)
    vi = pyvips.Image.new_from_memory(linear.data, width, height, bands, 'uchar')
    vi.tiffsave(str(target_file), tile=True, compression='lzw', bigtiff=True, pyramid=True)

        
    image_type = int(Image.ImageSourceTypes.SERVER_GENERATED)
    new_image = local_images_api.create_image(file_path=target_file, image_type=image_type, image_set=target_image_set.id).results[0]
    
    for anno in annotations:
        anno.image = new_image.id

        thread = local_annotations_api.create_annotation(body=anno, async_req=True)
        threads.append(thread)

### Wait until all annotations are in sync with the server

In [None]:
from time import sleep
while (len(threads) > 0):
    if len(threads) % 1000:
        print(len(threads))
    for thread in threads:
        if thread.ready():
            threads.remove(thread)
    sleep(0.25)