In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [6]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import openslide

In [7]:
# Python < 3.8?
import sys
#! pip install pickle5

if sys.version_info.minor < 8:
    import pickle5 as pickle

In [8]:
import sys
sys.path.append("../../")
from registration_tree import Rect, QuadTree

In [9]:
source_slide_paths = list(Path(r"D:/Datasets/ColonCancer/").glob("*/IHC/*.tif"))
target_slide_paths  = list(Path(r"D:/Datasets/ColonCancer/").glob("*/HE/*.tif"))

source_slide_dict = {name.stem.replace(" X40", "").replace(" HE", ""): 
                      {"name": name, "slide": openslide.OpenSlide(str(name))}
                      for name in source_slide_paths}
source_slide_dict.keys()

target_slide_dict = {name.stem.replace(" X40 IHC", ""):  
                      {"name": name, "slide": openslide.OpenSlide(str(name))}
                      for name in target_slide_paths}
target_slide_dict.keys()

dict_keys(['CRC-AI-11 F', 'CRC-AI-14 1G', 'CRC-AI-15', 'CRC-AI-8 E', 'CRC-AI-8 H'])

In [10]:
annotations = pd.read_csv("GT_D240.csv")

annotations["image_name_stem"] = [Path(image_name).stem for image_name in annotations["image_name"]]

annotations["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in annotations["vector"]]
annotations["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in annotations["vector"]]

annotations["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in annotations["vector"]]
annotations["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in annotations["vector"]]

annotations["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(annotations["x1"], annotations["x2"])]
annotations["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(annotations["y1"], annotations["y2"])]

annotations["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(annotations["center_x"], annotations["center_y"])]

annotations["anno_width"] = [x2-x1 for x1, x2 in zip(annotations["x1"], annotations["x2"])]
annotations["anno_height"]= [y2-y1 for y1, y2 in zip(annotations["y1"], annotations["y2"])]

annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,id,vector,unique_identifier,annotation_type,...,image_name_stem,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26106,"{'x1': 4236, 'x2': 4434, 'y1': 8043, 'y2': 8241}",f52b1e31-3df8-4041-9902-66bc6edef761,21,...,CRC-AI-15 40X,4236,8043,4434,8241,4335.0,8142.0,"[4335.0, 8142.0]",198,198
1,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26107,"{'x1': 6352, 'x2': 6552, 'y1': 23748, 'y2': 23...",b8ca1cea-38ef-4818-b050-d77ef0e44e21,22,...,CRC-AI-15 40X,6352,23748,6552,23948,6452.0,23848.0,"[6452.0, 23848.0]",200,200
2,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26108,"{'x1': 6158, 'x2': 6358, 'y1': 38586, 'y2': 38...",9d817eda-dd12-49b4-a942-9d7ddd77f305,23,...,CRC-AI-15 40X,6158,38586,6358,38786,6258.0,38686.0,"[6258.0, 38686.0]",200,200
3,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26109,"{'x1': 11860, 'x2': 12060, 'y1': 52464, 'y2': ...",04454db4-2e29-4e18-8f8e-271d8e154507,24,...,CRC-AI-15 40X,11860,52464,12060,52664,11960.0,52564.0,"[11960.0, 52564.0]",200,200
4,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26110,"{'x1': 6827, 'x2': 7025, 'y1': 68732, 'y2': 68...",81ee0679-7857-4fc4-a402-df65b0576831,25,...,CRC-AI-15 40X,6827,68732,7025,68934,6926.0,68833.0,"[6926.0, 68833.0]",198,202


In [None]:
%%time

for name, source in source_slide_dict.items():
    
    if name not in target_slide_dict:
        continue
        
    target = target_slide_dict[name]
        
    source_slide = source["name"]
    target_slide = target["name"]
    
    patient_id = source_slide.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "")
        
    tissue_detector = TissueDetector("LAB_Threshold", threshold=90)
    matcher_parameters = MatcherParameters() 
    
    try:
        matcher = WSI_Matcher(tissue_detector, matcher_parameters)
        offset = matcher.match(str(source_slide), str(target_slide))
        
        source_name, target_name = source_slide.stem, target_slide.stem
        
        source_annos = annotations[(annotations["image_name_stem"] == source_name) & 
                                   (annotations["image_type"] == image_type)]
        
            target_annos = annotations[(annotations["image_name_stem"] == target_name) & 
                                   (annotations["image_type"] == image_type)]
        
        print(f"S: {source_slide.stem}  T: {target_slide.stem} Results: {offset}")
    except:
         print(f"S: {source_slide.stem}  T: {target_slide.stem} Results: {None, None}")
    
    

In [23]:
results, quality = [], []

for path in Path("../../ColonCancer/").glob("*/*/*-To-*.pickle"):
    
    source_name, target_name = path.stem.split("-To-")
    
    qtree = pickle.load(open(str(path), "rb" ))
    
    patient_id = source_name.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "")
    image_type = "MSSC" if "40" not in source_name else "RSC"
    
    quality.append([patient_id, source_name, target_name, int(qtree.mean_reg_error), qtree.thumbnail_size[0], 
                    image_type, qtree.target_depth, qtree.run_time])
    
    
    source_annos = annotations[(annotations["image_name_stem"] == source_name) & 
                                   (annotations["image_type"] == image_type)]
    
    if len(source_annos) == 0:
        continue
    
    target_annos = annotations[(annotations["image_name_stem"] == target_name) & 
                                   (annotations["image_type"] == image_type)]
    
    if len(target_annos) == 0:
        continue
            
    for id, source_anno in source_annos.iterrows():

        target_anno = target_annos[target_annos["type_name"] == source_anno.type_name].iloc[0]
    
        target_scanner = target_anno.scanner
        image_id = target_anno.image_id
        image_name = target_anno.image_name
        image_width, image_height = target_anno.image_width, target_anno.image_height
        
        box = [source_anno.center_x, source_anno.center_y, source_anno.anno_width, source_anno.anno_height]

        trans_box = qtree.transform_boxes(np.array([box]))[0]
        
        distance = 99999
        for i in range(0, qtree.target_depth+1):
            
            temp_trans_box = qtree.transform_boxes(np.array([box]), i)[0]                          
                
            temp_distance = np.linalg.norm(target_anno.center-temp_trans_box[:2])
            
            #print(f"{i}:  {temp_trans_box[:2]} ->  {target_anno.center} = {temp_distance}")
            
            if temp_distance < distance:
                trans_box = temp_trans_box
                distance = temp_distance
            

        new_x1 = int(trans_box[0] - trans_box[2] // 2)
        new_y1 = int(trans_box[1] - trans_box[3] // 2)
        new_x2 = int(trans_box[0] + trans_box[2] // 2)
        new_y2 = int(trans_box[1] + trans_box[3] // 2)

        vector = {'x1': new_x1, 'x2': new_x2, 'y1': new_y1, 'y2': new_y2}

        row = [target_scanner, image_id, image_name, image_type, image_width, image_height,
                       vector, source_anno.unique_identifier, source_anno.annotation_type, 
                       source_anno.type_name, f"QTree_L{qtree.target_depth}", qtree.run_time, qtree.thumbnail_size[0]]

        results.append(row)
        
quality = pd.DataFrame(quality, columns=["patient_id", "source", "target","mean_reg_error", "thumbnail_size", "image_type", "target_depth", "run_time"])
        
results = pd.DataFrame(results, columns=["scanner", "image_id", "image_name", "image_type", "image_width", 
                                         "image_height", "vector", "unique_identifier", "annotation_type", 
                                         "type_name", "method", "runtime", "thumbnail_size"])
results.head()


Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,vector,unique_identifier,annotation_type,type_name,method,runtime,thumbnail_size
0,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 11412, 'x2': 11630, 'y1': 19569, 'y2': ...",d68e7b9a-d3ae-43ea-9c7c-4dcac427c588,21,L0,QTree_L0,2.143089,1024
1,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 10816, 'x2': 11034, 'y1': 45437, 'y2': ...",0ca22054-a283-4010-bde9-ecb8482c1bd3,22,L1,QTree_L0,2.143089,1024
2,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 9830, 'x2': 10048, 'y1': 105362, 'y2': ...",2a73c263-0dae-4151-82e0-c661ad26930c,24,L3,QTree_L0,2.143089,1024
3,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 12558, 'x2': 12776, 'y1': 130935, 'y2':...",0b4c38dd-b8a5-49bb-b0be-735b43e47cac,25,L4,QTree_L0,2.143089,1024
4,HE,335,CRC-A1-1 HE.tif,MSSC,140203,146077,"{'x1': 40622, 'x2': 40840, 'y1': 18528, 'y2': ...",325de832-a522-4d95-ab15-d5748eaba0fa,26,L5,QTree_L0,2.143089,1024


In [24]:
for thumbnail_size in results["thumbnail_size"].unique():
    
    data = results[results["thumbnail_size"] == thumbnail_size]
    data.to_csv(f"QuadTree_{thumbnail_size}.csv", index=False)

In [None]:
results[(results["type_name"] == "L22") 
        & (results["image_name"] == "CRC-A1-10 HE.tif") 
        & (results["thumbnail_size"] == 2048)]

In [None]:
annotations[(annotations["type_name"] == "L22") 
        & (annotations["image_name"] == "CRC-A1-10 HE.tif") 
        ]