In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from openslide import OpenSlide

In [3]:
plt.rcParams.update({'font.size': 25})

patch_size = 1024

In [4]:
slide_list = {path.name: OpenSlide(str(path)).get_thumbnail(size=(patch_size, patch_size)) for path in tqdm(Path(r"D:\Datasets\ScannerStudy").glob("*/*/*.*")) 
                      if path.suffix in [".svs", ".ndpi", ".tif"]}

slide_list

40it [03:05,  4.63s/it]


{'A_CCMCT_183715A_1.svs': <PIL.Image.Image image mode=RGB size=1024x880 at 0x198FCF614A8>,
 'A_CCMCT_22108_1.svs': <PIL.Image.Image image mode=RGB size=1024x881 at 0x198FCF615C0>,
 'A_CCMCT_29609B_1.svs': <PIL.Image.Image image mode=RGB size=848x1024 at 0x198FCF612B0>,
 'A_CCMCT_380609B_1.svs': <PIL.Image.Image image mode=RGB size=1024x726 at 0x198FCF61518>,
 'A_CCMCT_518711B_1.svs': <PIL.Image.Image image mode=RGB size=1024x795 at 0x198FCF615F8>,
 'A_BB_563476_1.svs': <PIL.Image.Image image mode=RGB size=1024x1023 at 0x198FCF617F0>,
 'A_BB_563479_1.svs': <PIL.Image.Image image mode=RGB size=992x1024 at 0x198FCF617B8>,
 'A_BB_568320_1.svs': <PIL.Image.Image image mode=RGB size=1024x1003 at 0x198FCF61630>,
 'A_BB_568381_1.svs': <PIL.Image.Image image mode=RGB size=1024x931 at 0x198FCF61BE0>,
 'A_BB_574162_1.svs': <PIL.Image.Image image mode=RGB size=1003x1024 at 0x198FCF61940>,
 'Z_CCMCT_183715A_1.tif': <PIL.Image.Image image mode=RGB size=1024x872 at 0x198FCF61C50>,
 'Z_CCMCT_22108_1.t

In [5]:
coordinates = {
    "L0": (0, 0), 
    "L1": (1, 0), 
    "L2": (2, 0), 
    "L3": (3, 0),   
    "L4": (4, 0),
    
    "L5": (0, 1),
    "L6": (1, 1),
    "L7": (2, 1),
    "L8": (3, 1),    
    "L9": (4, 1),
    
    "L10": (0, 2),
    "L11": (1, 2),
    "L12": (2, 2),
    "L13": (3, 2),    
    "L14": (4, 2),
    
    "L15": (0, 3),
    "L16": (1, 3),
    "L17": (2, 3),
    "L18": (3, 3),    
    "L19": (4, 3),
    
    "L20": (0, 4),
    "L21": (1, 4),
    "L22": (2, 4),
    "L23": (3, 4),    
    "L24": (4, 4),
}

In [6]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("_")[2] for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [7]:
gt_annotations = add_help_fields(pd.read_csv("GT.csv"))

predicted_annotations = add_help_fields(pd.concat([pd.read_csv(f"QuadTree_{patch_size}.csv"), 
                                                   pd.read_csv(f"ReStain.csv"),
                                                   pd.read_csv("GT_Registration.csv")]))

In [8]:
results = []

for image_name in predicted_annotations["image_name"].unique():
    
    gt_annotations_image = gt_annotations[gt_annotations["image_name"] == image_name]
    predicted_annotations_image = predicted_annotations[predicted_annotations["image_name"] == image_name]
    
    for method in predicted_annotations_image["method"].unique():
        
        method_annos = predicted_annotations_image[predicted_annotations_image["method"] == method]
        
        for type_name in gt_annotations_image["type_name"].unique():

            gt_anno = gt_annotations_image[gt_annotations_image["type_name"] == type_name].iloc[0]     
            predicted_anno = method_annos[method_annos["type_name"] == type_name].iloc[0]

            distance = np.linalg.norm(gt_anno.center-predicted_anno.center)  * 0.25 # pixiel to microns

            x_start = gt_anno.center_x / gt_anno.image_width * slide_list[image_name].width
            x_end = predicted_anno.center_x / predicted_anno.image_width * slide_list[image_name].width
            y_start = gt_anno.center_y / gt_anno.image_height * slide_list[image_name].height
            y_end = predicted_anno.center_y / predicted_anno.image_height * slide_list[image_name].height


            grid_x, grid_y = coordinates[gt_anno.type_name]

            row = [gt_anno.scanner, image_name, gt_anno.image_type, gt_anno.annotation_type, 
                   gt_anno.type_name, gt_anno.patient_id, distance, grid_x, grid_y,
                  x_start, x_end, y_start, y_end, method, predicted_anno.runtime]
            results.append(row)

results = pd.DataFrame(results, columns=["scanner", "image_name", "image_type", "annotation_type", 
                                         "type_name", "patient_id", "distance", "grid_x", "grid_y",
                                        "x_start", "x_end", "y_start", "y_end", "method", "runtime"])
results.head()

Unnamed: 0,scanner,image_name,image_type,annotation_type,type_name,patient_id,distance,grid_x,grid_y,x_start,x_end,y_start,y_end,method,runtime
0,Aperio,A_CCMCT_183715A_1.svs,CCMCT,384,L0,183715A,0.25,0,0,204.564503,204.551273,125.446077,125.446077,QTree_L0,0.795544
1,Aperio,A_CCMCT_183715A_1.svs,CCMCT,385,L1,183715A,0.25,1,0,136.535097,136.521867,266.837345,266.837345,QTree_L0,0.795544
2,Aperio,A_CCMCT_183715A_1.svs,CCMCT,386,L2,183715A,0.353553,2,0,121.121978,121.108748,435.015475,435.002254,QTree_L0,0.795544
3,Aperio,A_CCMCT_183715A_1.svs,CCMCT,387,L3,183715A,0.353553,3,0,122.855127,122.841897,587.816942,587.80372,QTree_L0,0.795544
4,Aperio,A_CCMCT_183715A_1.svs,CCMCT,389,L5,183715A,0.353553,0,1,319.428623,319.415393,110.201629,110.188407,QTree_L0,0.795544


In [9]:
method_comparision = []

for image_type in results["image_type"].unique():
    for method in results["method"].unique():
            
        df = results[(results["image_type"] == image_type) & 
                             (results["method"] == method)]

        if df.empty == False:
            sum_dist = df["distance"].mean()
            std_dist = df["distance"].std()
            min_dist = df["distance"].min()
            max_dist = df["distance"].max()
            
            runtime = df["runtime"].mean()
            


            method_comparision.append([method, image_type, sum_dist, std_dist, min_dist, max_dist, runtime])
            
method_comparision = pd.DataFrame(method_comparision, columns=["method", "image_type",  
                                                               "sum_dist", "std", "min", "max", "runtime"])
method_comparision

Unnamed: 0,method,image_type,sum_dist,std,min,max,runtime
0,QTree_L0,CCMCT,6.931838,7.46522,0.0,51.369373,0.967304
1,QTree_L1,CCMCT,2.961988,2.749818,0.0,15.167811,7.246006
2,QTree_L2,CCMCT,1.513309,1.716356,0.0,12.126933,89.527404
3,ReStain,CCMCT,2292.41308,980.925787,214.780564,5543.165895,21.179446
4,GT_Registration,CCMCT,2.073517,4.535132,0.0,29.648988,
5,QTree_L0,Cyto,3.546471,3.41811,0.0,24.220343,0.84661
6,QTree_L1,Cyto,1.787418,2.0121,0.0,18.990129,11.351669
7,QTree_L2,Cyto,1.708717,1.928902,0.0,18.990129,115.321775
8,GT_Registration,Cyto,1.184519,1.302474,0.0,14.916434,
