In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from openslide import OpenSlide

In [34]:
plt.rcParams.update({'font.size': 25})

patch_size = 1024

In [22]:
slide_list = {path.name: OpenSlide(str(path)).get_thumbnail(size=(patch_size, patch_size)) for path in tqdm(Path(r"D:\Datasets\ScannerStudy").glob("*/*/*.*")) 
                      if path.suffix in [".svs", ".ndpi", ".tif"]}

slide_list

76it [02:49,  2.23s/it]


{'A_CCMCT_183715A_1.svs': <PIL.Image.Image image mode=RGB size=2048x1761 at 0x185CA013048>,
 'A_CCMCT_22108_1.svs': <PIL.Image.Image image mode=RGB size=2048x1762 at 0x185CA0134A8>,
 'A_CCMCT_29609B_1.svs': <PIL.Image.Image image mode=RGB size=1696x2048 at 0x185CA003DD8>,
 'A_CCMCT_380609B_1.svs': <PIL.Image.Image image mode=RGB size=2048x1452 at 0x185CA0130B8>,
 'A_CCMCT_518711B_1.svs': <PIL.Image.Image image mode=RGB size=2048x1589 at 0x185CA0034A8>,
 'A_BB_563476_1.svs': <PIL.Image.Image image mode=RGB size=2048x2046 at 0x185CA012BA8>,
 'A_BB_563479_1.svs': <PIL.Image.Image image mode=RGB size=1984x2048 at 0x185CA012E10>,
 'A_BB_568320_1.svs': <PIL.Image.Image image mode=RGB size=2048x2005 at 0x185CA012EB8>,
 'A_BB_568381_1.svs': <PIL.Image.Image image mode=RGB size=2048x1861 at 0x185CA0680B8>,
 'A_BB_574162_1.svs': <PIL.Image.Image image mode=RGB size=2006x2048 at 0x185CA068128>,
 'Z_CCMCT_183715A_1.tif': <PIL.Image.Image image mode=RGB size=2048x1744 at 0x185CA068080>,
 'Z_CCMCT_2

In [23]:
coordinates = {
    "L0": (0, 0), 
    "L1": (1, 0), 
    "L2": (2, 0), 
    "L3": (3, 0),   
    "L4": (4, 0),
    
    "L5": (0, 1),
    "L6": (1, 1),
    "L7": (2, 1),
    "L8": (3, 1),    
    "L9": (4, 1),
    
    "L10": (0, 2),
    "L11": (1, 2),
    "L12": (2, 2),
    "L13": (3, 2),    
    "L14": (4, 2),
    
    "L15": (0, 3),
    "L16": (1, 3),
    "L17": (2, 3),
    "L18": (3, 3),    
    "L19": (4, 3),
    
    "L20": (0, 4),
    "L21": (1, 4),
    "L22": (2, 4),
    "L23": (3, 4),    
    "L24": (4, 4),
}

In [24]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("_")[2] for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [41]:
gt_annotations = add_help_fields(pd.read_csv("GT.csv"))

predicted_annotations = add_help_fields(pd.concat([pd.read_csv(f"QuadTree_{patch_size}.csv"), pd.read_csv("GT_Registration.csv")]))

In [42]:
results = []

for image_name in predicted_annotations["image_name"].unique():
    
    gt_annotations_image = gt_annotations[gt_annotations["image_name"] == image_name]
    predicted_annotations_image = predicted_annotations[predicted_annotations["image_name"] == image_name]
    
    for method in predicted_annotations_image["method"].unique():
        
        method_annos = predicted_annotations_image[predicted_annotations_image["method"] == method]
        
        for type_name in gt_annotations_image["type_name"].unique():

            gt_anno = gt_annotations_image[gt_annotations_image["type_name"] == type_name].iloc[0]     
            predicted_anno = method_annos[method_annos["type_name"] == type_name].iloc[0]

            distance = np.linalg.norm(gt_anno.center-predicted_anno.center)

            x_start = gt_anno.center_x / gt_anno.image_width * slide_list[image_name].width
            x_end = predicted_anno.center_x / predicted_anno.image_width * slide_list[image_name].width
            y_start = gt_anno.center_y / gt_anno.image_height * slide_list[image_name].height
            y_end = predicted_anno.center_y / predicted_anno.image_height * slide_list[image_name].height


            grid_x, grid_y = coordinates[gt_anno.type_name]

            row = [gt_anno.scanner, image_name, gt_anno.image_type, gt_anno.annotation_type, 
                   gt_anno.type_name, gt_anno.patient_id, distance, grid_x, grid_y,
                  x_start, x_end, y_start, y_end, method, predicted_anno.runtime]
            results.append(row)

results = pd.DataFrame(results, columns=["scanner", "image_name", "image_type", "annotation_type", 
                                         "type_name", "patient_id", "distance", "grid_x", "grid_y",
                                        "x_start", "x_end", "y_start", "y_end", "method", "runtime"])
results.head()

Unnamed: 0,scanner,image_name,image_type,annotation_type,type_name,patient_id,distance,grid_x,grid_y,x_start,x_end,y_start,y_end,method,runtime
0,Aperio,A_CCMCT_183715A_1.svs,CCMCT,384,L0,183715A,1.0,0,0,409.129007,409.102547,251.034707,251.034707,QTree_L0,0.795544
1,Aperio,A_CCMCT_183715A_1.svs,CCMCT,385,L1,183715A,1.0,1,0,273.070195,273.043734,533.977914,533.977914,QTree_L0,0.795544
2,Aperio,A_CCMCT_183715A_1.svs,CCMCT,386,L2,183715A,1.414214,2,0,242.243957,242.217496,870.525286,870.498828,QTree_L0,0.795544
3,Aperio,A_CCMCT_183715A_1.svs,CCMCT,387,L3,183715A,1.414214,3,0,245.710255,245.683794,1176.301857,1176.275399,QTree_L0,0.795544
4,Aperio,A_CCMCT_183715A_1.svs,CCMCT,389,L5,183715A,1.414214,0,1,638.857246,638.830786,220.528486,220.502028,QTree_L0,0.795544


In [43]:
method_comparision = []

for image_type in results["image_type"].unique():
    for method in results["method"].unique():
            
        df = results[(results["image_type"] == image_type) & 
                             (results["method"] == method)]

        if df.empty == False:
            sum_dist = df["distance"].mean()
            std_dist = df["distance"].std()
            min_dist = df["distance"].min()
            max_dist = df["distance"].max()
            
            runtime = df["runtime"].mean()
            


            method_comparision.append([method, image_type, sum_dist, std_dist, min_dist, max_dist, runtime])
            
method_comparision = pd.DataFrame(method_comparision, columns=["method", "image_type",  
                                                               "sum_dist", "std", "min", "max", "runtime"])
method_comparision

Unnamed: 0,method,image_type,sum_dist,std,min,max,runtime
0,QTree_L0,CCMCT,27.727352,29.860878,0.0,205.477493,0.967304
1,QTree_L1,CCMCT,11.847954,10.999273,0.0,60.671245,7.246006
2,QTree_L2,CCMCT,6.053237,6.865422,0.0,48.507731,89.527404
3,GT_Registration,CCMCT,8.294068,18.140527,0.0,118.595953,
4,QTree_L0,Cyto,14.185884,13.672438,0.0,96.881371,0.84661
5,QTree_L1,Cyto,7.149673,8.048399,0.0,75.960516,11.351669
6,QTree_L2,Cyto,6.83487,7.715609,0.0,75.960516,115.321775
7,GT_Registration,Cyto,4.738076,5.209896,0.0,59.665736,
