In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [55]:
from pathlib import Path
import pandas as pd
from openslide import OpenSlide
from tqdm import tqdm
import json
import numpy as np
import math
import os

In [20]:
coordinates = {
    "L0": 1, 
    "L1": 2, 
    "L2": 3, 
    "L3": 4,   
    "L4": 5,
    
    "L5": 6,
    "L6": 7,
    "L7": 8,
    "L8": 9,    
    "L9": 10,
    
    "L10": 11,
    "L11": 12,
    "L12": 13,
    "L13": 14,    
    "L14": 15,
    
    "L15": 16,
    "L16": 17,
    "L17": 18,
    "L18": 19,    
    "L19": 20,
    
    "L20": 21,
    "L21": 22,
    "L22": 23,
    "L23": 24,    
    "L24": 25,
}

In [17]:
slide_list = {path.name: path for path in tqdm(Path(r"D:\Datasets\ScannerStudy").glob("*/*/*.*")) 
                      if path.suffix in [".svs", ".ndpi", ".tif"]}

45it [00:00, 3461.47it/s]


In [39]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("_")[2] for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [40]:
gt_annotations = add_help_fields(pd.concat([pd.read_csv("../Validation/4Scanner/GT.csv")])) 
                                           #pd.read_csv("../Validation/HE_IHC/GT_D240.csv")
gt_annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,id,vector,unique_identifier,annotation_type,...,patient_id,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,2.0HT,10410,N2_CCMCT_380609B_1.ndpi,CCMCT,147456,107008,2755406,"{'x1': 20354, 'x2': 20408, 'y1': 16752, 'y2': ...",95183f6e-0a00-4de4-9c97-fedfec655a27,384,...,380609B,20354,16752,20408,16806,20381.0,16779.0,"[20381.0, 16779.0]",54,54
1,2.0HT,10410,N2_CCMCT_380609B_1.ndpi,CCMCT,147456,107008,2755407,"{'x1': 20256, 'x2': 20310, 'y1': 35448, 'y2': ...",cc3def2b-5631-4619-ab08-51f2a69c667d,385,...,380609B,20256,35448,20310,35502,20283.0,35475.0,"[20283.0, 35475.0]",54,54
2,2.0HT,10410,N2_CCMCT_380609B_1.ndpi,CCMCT,147456,107008,2755408,"{'x1': 20445, 'x2': 20499, 'y1': 54139, 'y2': ...",2d3de917-e55a-470f-8af2-10eb49b69687,386,...,380609B,20445,54139,20499,54193,20472.0,54166.0,"[20472.0, 54166.0]",54,54
3,2.0HT,10410,N2_CCMCT_380609B_1.ndpi,CCMCT,147456,107008,2755409,"{'x1': 26495, 'x2': 26549, 'y1': 71854, 'y2': ...",611f9d35-6cdb-42ef-bc38-2f44c9aaa257,387,...,380609B,26495,71854,26549,71908,26522.0,71881.0,"[26522.0, 71881.0]",54,54
4,2.0HT,10410,N2_CCMCT_380609B_1.ndpi,CCMCT,147456,107008,2755410,"{'x1': 35443, 'x2': 35497, 'y1': 90091, 'y2': ...",bf3ef0d3-5153-4693-96ba-96047cb3ae24,388,...,380609B,35443,90091,35497,90145,35470.0,90118.0,"[35470.0, 90118.0]",54,54


In [41]:
for image in gt_annotations["image_name"].unique():
    
    results = []
    
    for type_name, value in coordinates.items():
        
        image_type_annos = gt_annotations[(gt_annotations["image_name"] == image) 
                                          & (gt_annotations["type_name"] == type_name)]
        
        if image_type_annos.empty == False:
            
            anno = image_type_annos.iloc[0]
            
            x, y = anno.center
            
            results.append([value, int(x), int(y)])
            
    results = pd.DataFrame(results, columns=["", "X", "Y"])
    results.to_csv(f"Landmarks/{Path(image).stem}.csv",index=False)

In [49]:
landmark_list = {path.name: path for path in tqdm(Path(r"Landmarks").glob("*.csv"))}

60it [00:00, 30012.91it/s]


In [68]:
for image_type in tqdm(gt_annotations["image_type"].unique()):
    
    results = []
        
    image_type_gt_annotations = gt_annotations[gt_annotations["image_type"] == image_type]
    
    for patient_id in tqdm(image_type_gt_annotations["patient_id"].unique()):
        
        patient_annos = image_type_gt_annotations[image_type_gt_annotations["patient_id"] == patient_id]
        source_anno = patient_annos[patient_annos["scanner"] == "Aperio"].iloc[0]

        for scanner in ['2.0HT', 'Axio', 'S210']:

            target_anno = patient_annos[patient_annos["scanner"] == scanner].iloc[0]

            results.append([str(slide_list[target_anno.image_name]), 
                            str(slide_list[source_anno.image_name]),
                            str(Path(os.getcwd()) / landmark_list[f"{Path(target_anno.image_name).stem}.csv"]),
                            str(Path(os.getcwd()) / landmark_list[f"{Path(source_anno.image_name).stem}.csv"]),
                            (target_anno.image_width, target_anno.image_height),
                            math.sqrt((target_anno.image_width*target_anno.image_width)
                                      +(target_anno.image_height*target_anno.image_height))
                           ])
    results = pd.DataFrame(results, columns=["Target image", "Source image","Target landmarks",
                                             "Source landmarks","Image size [pixels]",
                                             "Image diagonal [pixels]"])
    results.to_csv(f"pairs-imgs-lnds_{image_type}.csv",index=False)
    

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 217.44it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 217.39it/s][A
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 20.83it/s]
