In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [59]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from pathlib import Path
import pickle
import openslide
import cv2

In [60]:
slides_val = list(set(['BAL Promyk Spray 4.svs',
                        'BAL AIA Blickfang Luft.svs'
                      ]))

slides_train = list(set(['BAL 1 Spray 2.svs', 
                         'BAL Booker Spray 3.svs',
                         'BAL Bubi Spray 1.svs', 
                         'BAL cent blue Luft 2.svs']))

labels = ['Mastzellen', "Makrophagen", "Neutrophile", "Eosinophile", "Lymohozyten"]

In [68]:
annotations_path = Path("../Statistics/Asthma_Annotations.pkl")
annotations = pd.read_pickle(annotations_path)
annotations = annotations[annotations["class"].isin(labels)]
annotations = annotations[annotations["deleted"] == False]

In [69]:
annotations["center_x"] =  [int(vector["x1"] + (vector["x2"] - vector["x1"]) / 2) for vector in annotations["vector"]]
annotations["center_y"] =  [int(vector["y1"] + (vector["y2"] - vector["y1"]) / 2) for vector in annotations["vector"]]
annotations.head()

Unnamed: 0,id,image_id,image_name,image_set,class,vector,unique_identifier,user_id,deleted,last_editor,center_x,center_y
0,172441,221,BAL 1 Spray 2.svs,14,Makrophagen,"{'x1': 13612, 'x2': 13666, 'y1': 12371, 'y2': ...",a268b46f-e13f-480e-b96d-c7900790ad5f,10,False,,13639,12398
1,172442,221,BAL 1 Spray 2.svs,14,Makrophagen,"{'x1': 13562, 'x2': 13618, 'y1': 12437, 'y2': ...",e8d67efd-0baf-4f78-ad73-b8f77422b826,10,False,,13590,12465
2,172443,221,BAL 1 Spray 2.svs,14,Makrophagen,"{'x1': 13482, 'x2': 13546, 'y1': 12458, 'y2': ...",55aa3d51-7f30-43a0-83ef-d9acc74d9a21,10,False,,13514,12490
3,172444,221,BAL 1 Spray 2.svs,14,Makrophagen,"{'x1': 13471, 'x2': 13533, 'y1': 12281, 'y2': ...",8f259e8f-119b-44de-8a90-2c53e7ee0e60,10,False,,13502,12312
4,172446,221,BAL 1 Spray 2.svs,14,Makrophagen,"{'x1': 13591, 'x2': 13673, 'y1': 12204, 'y2': ...",052b3f5a-f6f6-4b0d-a134-9f4797601dc4,10,False,,13632,12245


In [70]:
slides_path = Path("../Slides")
files = {slide.name: slide for slide in slides_path.rglob("*.svs") if slide.name in slides_train + slides_val}
files

{'BAL 1 Spray 2.svs': WindowsPath('../Slides/BAL 1 Spray 2.svs'),
 'BAL AIA Blickfang Luft.svs': WindowsPath('../Slides/BAL AIA Blickfang Luft.svs'),
 'BAL Booker Spray 3.svs': WindowsPath('../Slides/BAL Booker Spray 3.svs'),
 'BAL Bubi Spray 1.svs': WindowsPath('../Slides/BAL Bubi Spray 1.svs'),
 'BAL cent blue Luft 2.svs': WindowsPath('../Slides/BAL cent blue Luft 2.svs'),
 'BAL Promyk Spray 4.svs': WindowsPath('../Slides/BAL Promyk Spray 4.svs')}

In [71]:
screening_path = Path('../Statistics/Screening.pickle') 
screening_modes = pd.read_pickle(screening_path)

In [72]:
patch_size = 1024

data = []
for file_name in screening_modes:
    
    path = files[file_name]
    slide = openslide.open_slide(str(path))
    
    image_annotations = annotations[annotations["image_name"] == file_name]
    screening = screening_modes[file_name]
    
    tiles = [tile for tile in screening["screening_tiles"].values() if tile["Screened"] == True]
    
    for tile in tqdm(tiles):
        tile_center_x = int(tile["x_min"] + (tile["x_max"] - tile["x_min"]) / 2)
        tile_center_y = int(tile["y_min"] + (tile["y_max"] - tile["y_min"]) / 2)
        
        x_min = tile_center_x - 512
        y_min = tile_center_y - 512
        
        x_max = tile_center_x + 512
        y_max = tile_center_y + 512
        
        tile_annotations = image_annotations[(image_annotations["center_x"] > x_min) & (image_annotations["center_x"] < x_max) 
                                             & (image_annotations["center_y"] > y_min) & (image_annotations["center_y"] < y_max)]
        
        if len(tile_annotations) > 0:
            
            patch_name = f"{Path(file_name).stem}_{x_min}_{y_min}.png"
            
            patch = np.array(slide.read_region(location=(x_min, y_min), level=0, size=(patch_size, patch_size)))[:, :, :3]
                             
            im_rgb = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
            cv2.imwrite(str(Path("../TrainPatches") / patch_name), im_rgb)
            
            for label, vector in zip(tile_annotations["class"], tile_annotations["vector"]):
                new_vector = {
                    "x1": vector["x1"] - x_min,
                    "x2": vector["x2"] - x_min,
                    "y1": vector["y1"] - y_min,
                    "y2": vector["y2"] - y_min
                }
                
                data.append([file_name, patch_name, new_vector, label])
                
data = pd.DataFrame(data, columns=["file_name", "patch_name","vector", "label"])
data

100%|███████████████████████████████████████████████████████████████████████████████| 106/106 [00:00<00:00, 554.97it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 719/719 [00:01<00:00, 662.67it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 696/696 [00:00<00:00, 759.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 277/277 [00:00<00:00, 784.70it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 622/622 [00:00<00:00, 771.71it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 625.02it/s]


Unnamed: 0,file_name,patch_name,vector,label
0,BAL 1 Spray 2.svs,BAL 1 Spray 2_12409_2930.png,"{'x1': 253, 'x2': 311, 'y1': 940, 'y2': 998}",Lymohozyten
1,BAL 1 Spray 2.svs,BAL 1 Spray 2_12409_3335.png,"{'x1': -39, 'x2': 43, 'y1': 970, 'y2': 1052}",Makrophagen
2,BAL 1 Spray 2.svs,BAL 1 Spray 2_12409_3335.png,"{'x1': 253, 'x2': 311, 'y1': 535, 'y2': 593}",Lymohozyten
3,BAL 1 Spray 2.svs,BAL 1 Spray 2_12409_3335.png,"{'x1': 403, 'x2': 473, 'y1': 973, 'y2': 1043}",Neutrophile
4,BAL 1 Spray 2.svs,BAL 1 Spray 2_12409_3335.png,"{'x1': 392, 'x2': 470, 'y1': 899, 'y2': 977}",Neutrophile
...,...,...,...,...
125029,BAL cent blue Luft 2.svs,BAL cent blue Luft 2_5157_15876.png,"{'x1': 706, 'x2': 772, 'y1': 191, 'y2': 257}",Neutrophile
125030,BAL cent blue Luft 2.svs,BAL cent blue Luft 2_5157_15876.png,"{'x1': 940, 'x2': 982, 'y1': 468, 'y2': 510}",Neutrophile
125031,BAL cent blue Luft 2.svs,BAL cent blue Luft 2_5157_15876.png,"{'x1': 37, 'x2': 109, 'y1': 696, 'y2': 768}",Mastzellen
125032,BAL cent blue Luft 2.svs,BAL cent blue Luft 2_5157_15876.png,"{'x1': 737, 'x2': 833, 'y1': 92, 'y2': 188}",Mastzellen


In [73]:
with open('../TrainPatches/train_patches.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [55]:
len(data["patch_name"].unique())

1862

In [74]:
len(data[data["file_name"] == 'BAL Promyk Spray 4.svs']["patch_name"].unique())

445

In [75]:
len(data[data["file_name"] == 'BAL AIA Blickfang Luft.svs']["patch_name"].unique())

406