In [24]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [25]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import json
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from openslide import OpenSlide

In [26]:
from probreg import cpd
from probreg import callbacks
from probreg import transformation as tf

In [27]:
plt.rcParams.update({'font.size': 25})

patch_size = 1024

In [28]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "") for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [[center_x, center_y] for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [29]:
gt_annotations = add_help_fields(pd.read_csv("GT_D240.csv"))
gt_annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,id,vector,unique_identifier,annotation_type,...,patient_id,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26106,"{'x1': 4236, 'x2': 4434, 'y1': 8043, 'y2': 8241}",f52b1e31-3df8-4041-9902-66bc6edef761,21,...,15,4236,8043,4434,8241,4335.0,8142.0,"[4335.0, 8142.0]",198,198
1,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26107,"{'x1': 6352, 'x2': 6552, 'y1': 23748, 'y2': 23...",b8ca1cea-38ef-4818-b050-d77ef0e44e21,22,...,15,6352,23748,6552,23948,6452.0,23848.0,"[6452.0, 23848.0]",200,200
2,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26108,"{'x1': 6158, 'x2': 6358, 'y1': 38586, 'y2': 38...",9d817eda-dd12-49b4-a942-9d7ddd77f305,23,...,15,6158,38586,6358,38786,6258.0,38686.0,"[6258.0, 38686.0]",200,200
3,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26109,"{'x1': 11860, 'x2': 12060, 'y1': 52464, 'y2': ...",04454db4-2e29-4e18-8f8e-271d8e154507,24,...,15,11860,52464,12060,52664,11960.0,52564.0,"[11960.0, 52564.0]",200,200
4,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26110,"{'x1': 6827, 'x2': 7025, 'y1': 68732, 'y2': 68...",81ee0679-7857-4fc4-a402-df65b0576831,25,...,15,6827,68732,7025,68934,6926.0,68833.0,"[6926.0, 68833.0]",198,202


In [30]:
gt_annotations["patient_id"].unique()

array(['15', '8 E', '8 H', '11 F', '14 1G', '2', '5', '10', '13', '1'],
      dtype=object)

In [31]:
results, quality = [], []


for patient_id in tqdm(gt_annotations["patient_id"].unique()):
    
    patient_annos = gt_annotations[gt_annotations["patient_id"] == patient_id]
    
    source_annos = patient_annos[patient_annos["scanner"] == "IHC"]        
    
    intersections = list(set(source_annos["type_name"]).intersection(patient_annos["type_name"]))
    
    source_annos = source_annos[source_annos["type_name"].isin(intersections)]
    
    source_anno = source_annos.iloc[0]
    
    for scanner in ['HE']:
        
        target_annos = patient_annos[patient_annos["scanner"] == scanner]
        target_annos = target_annos[target_annos["type_name"].isin(intersections)]
        
        target_anno = target_annos.iloc[0]
        
        source = np.array([a for a in source_annos["center"]])
        target = np.array([a for a in target_annos["center"]])

        tf_param, sigma2, q = cpd.registration_cpd(source, target, 'affine')
        mean_reg_error = np.linalg.norm(tf_param.transform(source)-target, axis=1).mean()
        
        mpp_x_scale = tf_param.b[0][0]
        mpp_y_scale = tf_param.b[1][1]
        
        
        target_scanner = target_anno.scanner 
        image_id = target_anno.image_id 
        image_name = target_anno.image_name 
        image_width, image_height = target_anno.image_width, target_anno.image_height
        
        image_type = target_anno.image_type# "MSSC" if "Multi" in image_name else "RSC"
        
        quality.append([patient_id, source_anno.image_name, target_anno.image_name, 
                        int(q), int(sigma2), image_type, tf_param.b, tf_param.t, mean_reg_error])
        
        for id, source_anno in source_annos.iterrows():          
            
            # trans center
            box_1 = [source_anno.center_x, source_anno.center_y]
            trans_box_1 = tf_param.transform([box_1])[0]
            
            # trans width and hight
            new_width, new_height = source_anno.anno_width * mpp_x_scale, source_anno.anno_height * mpp_y_scale
            
            trans_box = [trans_box_1[0], trans_box_1[1], new_width, new_height]
            
            new_x1 = int(trans_box[0] - trans_box[2] // 2)
            new_y1 = int(trans_box[1] - trans_box[3] // 2)
            new_x2 = int(trans_box[0] + trans_box[2] // 2)
            new_y2 = int(trans_box[1] + trans_box[3] // 2)
            
            vector = {'x1': new_x1, 'x2': new_x2, 'y1': new_y1, 'y2': new_y2}
            
            row = [target_scanner, image_id, image_name, image_type, image_width, image_height,
                    vector, source_anno.unique_identifier, source_anno.annotation_type, 
                   source_anno.type_name, "GT_Registration"]
            
            results.append(row)

quality = pd.DataFrame(quality, columns=["patient_id", "source", "target","q", "sigma2", "image_type", "b", "t", "mean_reg_error"])

results = pd.DataFrame(results, columns=["scanner", "image_id", "image_name", "image_type", "image_width", 
                                         "image_height", "vector", "unique_identifier", "annotation_type", "type_name", "method"])
results.head()

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 55.56it/s]


Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,vector,unique_identifier,annotation_type,type_name,method
0,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,"{'x1': 4230, 'x2': 4430, 'y1': 7882, 'y2': 8082}",f52b1e31-3df8-4041-9902-66bc6edef761,21,L0,GT_Registration
1,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,"{'x1': 6356, 'x2': 6556, 'y1': 23681, 'y2': 23...",b8ca1cea-38ef-4818-b050-d77ef0e44e21,22,L1,GT_Registration
2,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,"{'x1': 6165, 'x2': 6365, 'y1': 38592, 'y2': 38...",9d817eda-dd12-49b4-a942-9d7ddd77f305,23,L2,GT_Registration
3,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,"{'x1': 11865, 'x2': 12065, 'y1': 52519, 'y2': ...",04454db4-2e29-4e18-8f8e-271d8e154507,24,L3,GT_Registration
4,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,"{'x1': 6816, 'x2': 7016, 'y1': 68751, 'y2': 68...",81ee0679-7857-4fc4-a402-df65b0576831,25,L4,GT_Registration


In [32]:
results.to_csv("GT_Registration_D240.csv", index=False)

In [23]:
quality[["patient_id", "target", "mean_reg_error"]]

Unnamed: 0,patient_id,target,mean_reg_error
0,15,CRC-AI-15 40X.tif,458.382436
1,8 E,CRC-AI-8 E X40.tif,138.745466
2,8 H,CRC-AI-8 H X40.tif,12.520152
3,11 F,CRC-AI-11 F X40.tif,43.536833
4,14 1G,CRC-AI-14 1G X40.tif,13.469475
5,2,CRC-A1-2 HE.tif,424.21574
6,5,CRC-A1-5 HE.tif,1569.785094
7,10,CRC-A1-10 HE.tif,746.041115
8,13,CRC-A1-13 HE.tif,186.020582
9,1,CRC-A1-1 HE.tif,1123.539822
