In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from openslide import OpenSlide

In [14]:
plt.rcParams.update({'font.size': 25})

patch_size = 2048

In [9]:
slide_list = {path.name: OpenSlide(str(path)).get_thumbnail(size=(patch_size, patch_size)) for path in tqdm(Path(r"D:\Datasets\ColonCancer").glob("*/*/*.*")) 
                      if path.suffix in [".tif"]}

slide_list

20it [00:21,  1.08s/it]


{'CRC-A1-1 HE.tif': <PIL.Image.Image image mode=RGB size=983x1024 at 0x193A99E40F0>,
 'CRC-A1-10 HE.tif': <PIL.Image.Image image mode=RGB size=1024x935 at 0x193A99E4438>,
 'CRC-A1-13 HE.tif': <PIL.Image.Image image mode=RGB size=1024x964 at 0x193A99E4518>,
 'CRC-A1-2 HE.tif': <PIL.Image.Image image mode=RGB size=838x1024 at 0x193A99E4278>,
 'CRC-A1-5 HE.tif': <PIL.Image.Image image mode=RGB size=741x1024 at 0x193A99E42E8>,
 'CRC-A1-1.tif': <PIL.Image.Image image mode=RGB size=1024x1003 at 0x193A99E40B8>,
 'CRC-A1-10.tif': <PIL.Image.Image image mode=RGB size=1024x983 at 0x193A99E4A58>,
 'CRC-A1-13.tif': <PIL.Image.Image image mode=RGB size=814x1024 at 0x193AB35F198>,
 'CRC-A1-2.tif': <PIL.Image.Image image mode=RGB size=879x1024 at 0x193AB35F0F0>,
 'CRC-A1-5.tif': <PIL.Image.Image image mode=RGB size=794x1024 at 0x193AB35F080>,
 'CRC-AI-11 F X40.tif': <PIL.Image.Image image mode=RGB size=816x1024 at 0x193AB35F128>,
 'CRC-AI-14 1G X40.tif': <PIL.Image.Image image mode=RGB size=878x1024 

In [5]:
coordinates = {
    "L0": (0, 0), 
    "L1": (1, 0), 
    "L2": (2, 0), 
    "L3": (3, 0),   
    "L4": (4, 0),
    
    "L5": (0, 1),
    "L6": (1, 1),
    "L7": (2, 1),
    "L8": (3, 1),    
    "L9": (4, 1),
    
    "L10": (0, 2),
    "L11": (1, 2),
    "L12": (2, 2),
    "L13": (3, 2),    
    "L14": (4, 2),
    
    "L15": (0, 3),
    "L16": (1, 3),
    "L17": (2, 3),
    "L18": (3, 3),    
    "L19": (4, 3),
    
    "L20": (0, 4),
    "L21": (1, 4),
    "L22": (2, 4),
    "L23": (3, 4),    
    "L24": (4, 4),
}

In [6]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "") for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [15]:
gt_annotations = add_help_fields(pd.read_csv("GT_D240.csv"))

predicted_annotations = add_help_fields(pd.concat([pd.read_csv(f"QuadTree_{patch_size}.csv"), pd.read_csv("GT_Registration_D240.csv")]))
predicted_annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,vector,unique_identifier,annotation_type,type_name,...,patient_id,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 12011, 'x2': 12213, 'y1': 20314, 'y2': ...",d68e7b9a-d3ae-43ea-9c7c-4dcac427c588,21,L0,...,1,12011,20314,12213,20512,12112.0,20413.0,"[12112.0, 20413.0]",202,198
1,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 11780, 'x2': 11982, 'y1': 45560, 'y2': ...",0ca22054-a283-4010-bde9-ecb8482c1bd3,22,L1,...,1,11780,45560,11982,45758,11881.0,45659.0,"[11881.0, 45659.0]",202,198
2,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 11609, 'x2': 11811, 'y1': 104031, 'y2':...",2a73c263-0dae-4151-82e0-c661ad26930c,24,L3,...,1,11609,104031,11811,104229,11710.0,104130.0,"[11710.0, 104130.0]",202,198
3,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 14464, 'x2': 14666, 'y1': 128876, 'y2':...",0b4c38dd-b8a5-49bb-b0be-735b43e47cac,25,L4,...,1,14464,128876,14666,129074,14565.0,128975.0,"[14565.0, 128975.0]",202,198
4,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 39151, 'x2': 39353, 'y1': 18292, 'y2': ...",325de832-a522-4d95-ab15-d5748eaba0fa,26,L5,...,1,39151,18292,39353,18490,39252.0,18391.0,"[39252.0, 18391.0]",202,198


In [20]:
results = []

for image_name in predicted_annotations["image_name"].unique():
    
    gt_annotations_image = gt_annotations[gt_annotations["image_name"] == image_name]
    predicted_annotations_image = predicted_annotations[predicted_annotations["image_name"] == image_name]
    
    for method in predicted_annotations_image["method"].unique():
        
        method_annos = predicted_annotations_image[predicted_annotations_image["method"] == method]
        
        for type_name in gt_annotations_image["type_name"].unique():

            gt_anno = gt_annotations_image[gt_annotations_image["type_name"] == type_name].iloc[0]     
            predicted_anno = method_annos[method_annos["type_name"] == type_name].iloc[0]
 
            distance = np.linalg.norm(gt_anno.center-predicted_anno.center) * 0.25 # pixiel to microns

            temp_image_name = image_name.replace("40X", "X40")
            x_start = gt_anno.center_x / gt_anno.image_width * slide_list[temp_image_name].width
            x_end = predicted_anno.center_x / predicted_anno.image_width * slide_list[temp_image_name].width
            y_start = gt_anno.center_y / gt_anno.image_height * slide_list[temp_image_name].height
            y_end = predicted_anno.center_y / predicted_anno.image_height * slide_list[temp_image_name].height


            grid_x, grid_y = coordinates[gt_anno.type_name]

            row = [gt_anno.scanner, image_name, gt_anno.image_type, gt_anno.annotation_type, 
                   gt_anno.type_name, gt_anno.patient_id, distance, grid_x, grid_y,
                  x_start, x_end, y_start, y_end, method, predicted_anno.runtime]
            results.append(row)

results = pd.DataFrame(results, columns=["scanner", "image_name", "image_type", "annotation_type", 
                                         "type_name", "patient_id", "distance", "grid_x", "grid_y",
                                        "x_start", "x_end", "y_start", "y_end", "method", "runtime"])
results.head()

Unnamed: 0,scanner,image_name,image_type,annotation_type,type_name,patient_id,distance,grid_x,grid_y,x_start,x_end,y_start,y_end,method,runtime
0,HE,CRC-A1-1 HE.tif,MSSC,21,L0,1,365.037755,0,0,82.81703,84.920408,133.07787,143.095162,QTree_L0,5.484886
1,HE,CRC-A1-1 HE.tif,MSSC,22,L1,1,452.803627,1,0,74.627875,83.300807,310.79543,320.069662,QTree_L0,5.484886
2,HE,CRC-A1-1 HE.tif,MSSC,24,L3,1,467.782869,3,0,73.106431,82.101881,739.499086,729.951464,QTree_L0,5.484886
3,HE,CRC-A1-1 HE.tif,MSSC,25,L4,1,714.678687,4,0,94.021027,102.119035,922.446107,904.114953,QTree_L0,5.484886
4,HE,CRC-A1-1 HE.tif,MSSC,26,L5,1,167.565099,0,1,279.889589,275.206065,128.535389,128.920939,QTree_L0,5.484886


In [21]:
method_comparision = []

for image_type in results["image_type"].unique():
    for method in results["method"].unique():
            
        df = results[(results["image_type"] == image_type) & 
                             (results["method"] == method)]

        if df.empty == False:
            sum_dist = df["distance"].mean()
            std_dist = df["distance"].std()
            min_dist = df["distance"].min()
            max_dist = df["distance"].max()
            
            runtime = df["runtime"].mean()
            


            method_comparision.append([method, image_type, sum_dist, std_dist, min_dist, max_dist, runtime])
            
method_comparision = pd.DataFrame(method_comparision, columns=["method", "image_type",  
                                                               "sum_dist", "std", "min", "max", "runtime"])
method_comparision

Unnamed: 0,method,image_type,sum_dist,std,min,max,runtime
0,QTree_L0,MSSC,612.84288,616.421965,13.252358,2697.357051,3.741194
1,QTree_L1,MSSC,448.6058,574.620963,13.252358,2697.357051,43.449443
2,QTree_L2,MSSC,416.013066,539.796611,5.273756,2246.650076,307.209801
3,GT_Registration,MSSC,211.139012,230.564973,12.611998,1650.802002,
4,QTree_L0,RSC,16.342287,19.634986,0.707107,89.990277,1.262502
5,QTree_L1,RSC,3.694585,3.750322,0.353553,21.931712,14.408189
6,QTree_L2,RSC,3.090563,4.601399,0.0,26.38655,113.826477
7,GT_Registration,RSC,6.471005,11.3784,0.25,73.251706,
