In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from openslide import OpenSlide

In [3]:
plt.rcParams.update({'font.size': 25})

patch_size = 2048

In [10]:
slide_list = {path.name: OpenSlide(str(path)).get_thumbnail(size=(patch_size, patch_size)) for path in tqdm(Path(r"D:\Datasets\ColonCancer").glob("*/*/*.*")) 
                      if path.suffix in [".tif"]}

slide_list


0it [00:00, ?it/s][A
1it [00:02,  2.96s/it][A
2it [00:05,  2.94s/it][A
3it [00:09,  3.15s/it][A
4it [00:12,  3.19s/it][A
5it [00:13,  2.34s/it][A
6it [00:14,  2.17s/it][A
7it [00:18,  2.47s/it][A
8it [00:19,  2.31s/it][A
9it [00:22,  2.22s/it][A
10it [00:22,  1.67s/it][A
11it [00:23,  1.44s/it][A
12it [00:24,  1.27s/it][A
13it [00:25,  1.14s/it][A
14it [00:25,  1.03it/s][A
15it [00:26,  1.04it/s][A
16it [00:27,  1.15it/s][A
17it [00:27,  1.25it/s][A
18it [00:28,  1.38it/s][A
19it [00:29,  1.41it/s][A
20it [00:29,  1.48s/it][A


{'CRC-A1-1 HE.tif': <PIL.Image.Image image mode=RGB size=1966x2048 at 0x2655A1B7780>,
 'CRC-A1-10 HE.tif': <PIL.Image.Image image mode=RGB size=2048x1871 at 0x2655A1B78D0>,
 'CRC-A1-13 HE.tif': <PIL.Image.Image image mode=RGB size=2048x1927 at 0x2655A1B7470>,
 'CRC-A1-2 HE.tif': <PIL.Image.Image image mode=RGB size=1675x2048 at 0x2655A1B7898>,
 'CRC-A1-5 HE.tif': <PIL.Image.Image image mode=RGB size=1482x2048 at 0x2655A1B7AC8>,
 'CRC-A1-1.tif': <PIL.Image.Image image mode=RGB size=2048x2007 at 0x2655A1B7BA8>,
 'CRC-A1-10.tif': <PIL.Image.Image image mode=RGB size=2048x1967 at 0x2655A1B7A58>,
 'CRC-A1-13.tif': <PIL.Image.Image image mode=RGB size=1628x2048 at 0x2655A1B7C18>,
 'CRC-A1-2.tif': <PIL.Image.Image image mode=RGB size=1758x2048 at 0x2655A1B7A20>,
 'CRC-A1-5.tif': <PIL.Image.Image image mode=RGB size=1588x2048 at 0x2655A1B7DD8>,
 'CRC-AI-11 F X40.tif': <PIL.Image.Image image mode=RGB size=1628x2048 at 0x2655A1B7E80>,
 'CRC-AI-14 1G X40.tif': <PIL.Image.Image image mode=RGB size

In [11]:
coordinates = {
    "L0": (0, 0), 
    "L1": (1, 0), 
    "L2": (2, 0), 
    "L3": (3, 0),   
    "L4": (4, 0),
    
    "L5": (0, 1),
    "L6": (1, 1),
    "L7": (2, 1),
    "L8": (3, 1),    
    "L9": (4, 1),
    
    "L10": (0, 2),
    "L11": (1, 2),
    "L12": (2, 2),
    "L13": (3, 2),    
    "L14": (4, 2),
    
    "L15": (0, 3),
    "L16": (1, 3),
    "L17": (2, 3),
    "L18": (3, 3),    
    "L19": (4, 3),
    
    "L20": (0, 4),
    "L21": (1, 4),
    "L22": (2, 4),
    "L23": (3, 4),    
    "L24": (4, 4),
}

In [12]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "") for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [13]:
gt_annotations = add_help_fields(pd.read_csv("GT_D240.csv"))

predicted_annotations = add_help_fields(pd.concat([pd.read_csv(f"QuadTree_{patch_size}.csv"), 
                                                   pd.read_csv(f"ReStain.csv"),
                                                   pd.read_csv("GT_Registration_D240.csv")]))
predicted_annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,vector,unique_identifier,annotation_type,type_name,...,patient_id,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 12011, 'x2': 12213, 'y1': 20314, 'y2': ...",d68e7b9a-d3ae-43ea-9c7c-4dcac427c588,21,L0,...,1,12011.0,20314.0,12213.0,20512.0,12112.0,20413.0,"[12112.0, 20413.0]",202.0,198.0
1,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 11780, 'x2': 11982, 'y1': 45560, 'y2': ...",0ca22054-a283-4010-bde9-ecb8482c1bd3,22,L1,...,1,11780.0,45560.0,11982.0,45758.0,11881.0,45659.0,"[11881.0, 45659.0]",202.0,198.0
2,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 11609, 'x2': 11811, 'y1': 104031, 'y2':...",2a73c263-0dae-4151-82e0-c661ad26930c,24,L3,...,1,11609.0,104031.0,11811.0,104229.0,11710.0,104130.0,"[11710.0, 104130.0]",202.0,198.0
3,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 14464, 'x2': 14666, 'y1': 128876, 'y2':...",0b4c38dd-b8a5-49bb-b0be-735b43e47cac,25,L4,...,1,14464.0,128876.0,14666.0,129074.0,14565.0,128975.0,"[14565.0, 128975.0]",202.0,198.0
4,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 39151, 'x2': 39353, 'y1': 18292, 'y2': ...",325de832-a522-4d95-ab15-d5748eaba0fa,26,L5,...,1,39151.0,18292.0,39353.0,18490.0,39252.0,18391.0,"[39252.0, 18391.0]",202.0,198.0


In [18]:
results = []

for image_name in predicted_annotations["image_name"].unique():
    
    gt_annotations_image = gt_annotations[gt_annotations["image_name"] == image_name]
    predicted_annotations_image = predicted_annotations[predicted_annotations["image_name"] == image_name]
    
    for method in predicted_annotations_image["method"].unique():
        
        method_annos = predicted_annotations_image[predicted_annotations_image["method"] == method]
        
        for type_name in gt_annotations_image["type_name"].unique():

            gt_anno = gt_annotations_image[gt_annotations_image["type_name"] == type_name].iloc[0]     
            predicted_anno = method_annos[method_annos["type_name"] == type_name].iloc[0]
 
            distance = np.linalg.norm(gt_anno.center-predicted_anno.center) * 0.25 # pixiel to microns

            temp_image_name = image_name.replace("40X", "X40")
            x_start = gt_anno.center_x / gt_anno.image_width * slide_list[temp_image_name].width
            x_end = predicted_anno.center_x / predicted_anno.image_width * slide_list[temp_image_name].width
            y_start = gt_anno.center_y / gt_anno.image_height * slide_list[temp_image_name].height
            y_end = predicted_anno.center_y / predicted_anno.image_height * slide_list[temp_image_name].height


            grid_x, grid_y = coordinates[gt_anno.type_name]

            row = [gt_anno.scanner, image_name, gt_anno.image_type, gt_anno.annotation_type, 
                   gt_anno.type_name, gt_anno.patient_id, distance, grid_x, grid_y,
                  x_start, x_end, y_start, y_end, method, predicted_anno.runtime]
            results.append(row)

results = pd.DataFrame(results, columns=["scanner", "image_name", "image_type", "annotation_type", 
                                         "type_name", "patient_id", "distance", "grid_x", "grid_y",
                                        "x_start", "x_end", "y_start", "y_end", "method", "runtime"])
results.head()

Unnamed: 0,scanner,image_name,image_type,annotation_type,type_name,patient_id,distance,grid_x,grid_y,x_start,x_end,y_start,y_end,method,runtime
0,HE,CRC-A1-1 HE.tif,MSSC,21,L0,1,365.037755,0,0,165.634059,169.840817,266.15574,286.190324,QTree_L0,5.484886
1,HE,CRC-A1-1 HE.tif,MSSC,22,L1,1,452.803627,1,0,149.255751,166.601613,621.59086,640.139324,QTree_L0,5.484886
2,HE,CRC-A1-1 HE.tif,MSSC,24,L3,1,467.782869,3,0,146.212863,164.203762,1478.998172,1459.902928,QTree_L0,5.484886
3,HE,CRC-A1-1 HE.tif,MSSC,25,L4,1,714.678687,4,0,188.042053,204.238069,1844.892214,1808.229906,QTree_L0,5.484886
4,HE,CRC-A1-1 HE.tif,MSSC,26,L5,1,167.565099,0,1,559.779177,550.412131,257.070778,257.841878,QTree_L0,5.484886


In [19]:
method_comparision = []

for image_type in results["image_type"].unique():
    for method in results["method"].unique():
            
        df = results[(results["image_type"] == image_type) & 
                             (results["method"] == method)]

        if df.empty == False:
            sum_dist = df["distance"].mean()
            std_dist = df["distance"].std()
            min_dist = df["distance"].min()
            max_dist = df["distance"].max()
            
            runtime = df["runtime"].mean()
            


            method_comparision.append([method, image_type, sum_dist, std_dist, min_dist, max_dist, runtime])
            
method_comparision = pd.DataFrame(method_comparision, columns=["method", "image_type",  
                                                               "sum_dist", "std", "min", "max", "runtime"])
method_comparision

Unnamed: 0,method,image_type,sum_dist,std,min,max,runtime
0,QTree_L0,MSSC,612.84288,616.421965,13.252358,2697.357051,3.741194
1,QTree_L1,MSSC,448.6058,574.620963,13.252358,2697.357051,43.449443
2,QTree_L2,MSSC,416.013066,539.796611,5.273756,2246.650076,307.209801
3,ReStain,MSSC,1401.271049,980.993955,144.401807,3719.357132,90.760244
4,GT_Registration,MSSC,211.139012,230.564973,12.611998,1650.802002,
5,QTree_L0,RSC,16.342287,19.634986,0.707107,89.990277,1.262502
6,QTree_L1,RSC,3.694585,3.750322,0.353553,21.931712,14.408189
7,QTree_L2,RSC,3.090563,4.601399,0.0,26.38655,113.826477
8,ReStain,RSC,11.048494,7.728093,1.057284,30.090914,108.577472
9,GT_Registration,RSC,6.471005,11.3784,0.25,73.251706,
