In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from pathlib import Path
import pandas as pd
from openslide import OpenSlide
from tqdm import tqdm
import json
import numpy as np
import math
import os

In [3]:
coordinates = {
    "L0": 1, 
    "L1": 2, 
    "L2": 3, 
    "L3": 4,   
    "L4": 5,
    
    "L5": 6,
    "L6": 7,
    "L7": 8,
    "L8": 9,    
    "L9": 10,
    
    "L10": 11,
    "L11": 12,
    "L12": 13,
    "L13": 14,    
    "L14": 15,
    
    "L15": 16,
    "L16": 17,
    "L17": 18,
    "L18": 19,    
    "L19": 20,
    
    "L20": 21,
    "L21": 22,
    "L22": 23,
    "L23": 24,    
    "L24": 25,
}

In [4]:
slide_list = {path.name: path for path in tqdm(Path(r"D:\Datasets\ColonCancer").glob("*/*/*.*"))  
                      if path.suffix in [".tif"]}

20it [00:00, 1661.34it/s]


In [5]:
def add_help_fields(frame):
    
    frame["image_name_stem"] = [Path(image_name).stem for image_name in frame["image_name"]]
    
    frame["patient_id"] = [name.split("-")[2].replace("40X", "X40").split(" X40")[0].replace(" HE", "").replace(".tif", "") for name in frame["image_name"]]

    frame["x1"] = [json.loads(vector.replace("\'","\""))['x1'] for vector in frame["vector"]]
    frame["y1"] = [json.loads(vector.replace("\'","\""))['y1'] for vector in frame["vector"]]

    frame["x2"] = [json.loads(vector.replace("\'","\""))['x2'] for vector in frame["vector"]]
    frame["y2"] = [json.loads(vector.replace("\'","\""))['y2'] for vector in frame["vector"]]

    frame["center_x"] = [x1 + ((x2-x1) / 2) for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["center_y"] = [y1 + ((y2-y1) / 2) for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    frame["center"] = [np.array((center_x, center_y)) for center_x, center_y in zip(frame["center_x"], frame["center_y"])]

    frame["anno_width"] = [x2-x1 for x1, x2 in zip(frame["x1"], frame["x2"])]
    frame["anno_height"]= [y2-y1 for y1, y2 in zip(frame["y1"], frame["y2"])]
    
    return frame

In [6]:
gt_annotations = add_help_fields(pd.concat([pd.read_csv("../Validation/HE_IHC/GT_D240.csv")])) 
gt_annotations.head()

Unnamed: 0,scanner,image_id,image_name,image_type,image_width,image_height,id,vector,unique_identifier,annotation_type,...,patient_id,x1,y1,x2,y2,center_x,center_y,center,anno_width,anno_height
0,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26106,"{'x1': 4236, 'x2': 4434, 'y1': 8043, 'y2': 8241}",f52b1e31-3df8-4041-9902-66bc6edef761,21,...,15,4236,8043,4434,8241,4335.0,8142.0,"[4335.0, 8142.0]",198,198
1,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26107,"{'x1': 6352, 'x2': 6552, 'y1': 23748, 'y2': 23...",b8ca1cea-38ef-4818-b050-d77ef0e44e21,22,...,15,6352,23748,6552,23948,6452.0,23848.0,"[6452.0, 23848.0]",200,200
2,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26108,"{'x1': 6158, 'x2': 6358, 'y1': 38586, 'y2': 38...",9d817eda-dd12-49b4-a942-9d7ddd77f305,23,...,15,6158,38586,6358,38786,6258.0,38686.0,"[6258.0, 38686.0]",200,200
3,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26109,"{'x1': 11860, 'x2': 12060, 'y1': 52464, 'y2': ...",04454db4-2e29-4e18-8f8e-271d8e154507,24,...,15,11860,52464,12060,52664,11960.0,52564.0,"[11960.0, 52564.0]",200,200
4,HE,325,CRC-AI-15 40X.tif,RSC,86256,86432,26110,"{'x1': 6827, 'x2': 7025, 'y1': 68732, 'y2': 68...",81ee0679-7857-4fc4-a402-df65b0576831,25,...,15,6827,68732,7025,68934,6926.0,68833.0,"[6926.0, 68833.0]",198,202


In [7]:
for image in gt_annotations["image_name"].unique():
    
    results = []
    
    for type_name, value in coordinates.items():
        
        image_type_annos = gt_annotations[(gt_annotations["image_name"] == image) 
                                          & (gt_annotations["type_name"] == type_name)]
        
        if image_type_annos.empty == False:
            
            anno = image_type_annos.iloc[0]
            
            x, y = anno.center
            
            results.append([value, int(x), int(y)])
            
    results = pd.DataFrame(results, columns=["", "X", "Y"])
    results.to_csv(f"Landmarks/{Path(image).stem}.csv",index=False)

In [8]:
landmark_list = {path.name: path for path in tqdm(Path(r"Landmarks").glob("*.csv"))}

60it [00:00, 29877.51it/s]


In [11]:
for image_type in tqdm(gt_annotations["image_type"].unique()):
    
    results = []
        
    image_type_gt_annotations = gt_annotations[gt_annotations["image_type"] == image_type]
    
    for patient_id in tqdm(image_type_gt_annotations["patient_id"].unique()):
        
        patient_annos = image_type_gt_annotations[image_type_gt_annotations["patient_id"] == patient_id]
        source_anno = patient_annos[patient_annos["scanner"] == "IHC"].iloc[0]

        for scanner in ['HE']:

            target_anno = patient_annos[patient_annos["scanner"] == scanner].iloc[0]

            results.append([str(slide_list[target_anno.image_name.replace("40X", "X40")]), 
                            str(slide_list[source_anno.image_name.replace("40X", "X40")]),
                            str(Path(os.getcwd()) / landmark_list[f"{Path(target_anno.image_name).stem}.csv"]),
                            str(Path(os.getcwd()) / landmark_list[f"{Path(source_anno.image_name).stem}.csv"]),
                            (target_anno.image_width, target_anno.image_height),
                            math.sqrt((target_anno.image_width*target_anno.image_width)
                                      +(target_anno.image_height*target_anno.image_height))
                           ])
    results = pd.DataFrame(results, columns=["Target image", "Source image","Target landmarks",
                                             "Source landmarks","Image size [pixels]",
                                             "Image diagonal [pixels]"])
    results.to_csv(f"pairs-imgs-lnds_{image_type}.csv",index=False)
    

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 454.50it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 454.54it/s][A
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 28.17it/s]
