In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from pathlib import Path

# Load annotation versions

In [4]:
def set_additional_fields(frame):
    
    frame["width"] = [vector["x2"] - vector["x1"] for vector in frame['vector']]
    frame["height"] = [vector["y2"] - vector["y1"] for vector in frame['vector']]
    frame["scales"] = frame["width"] / frame["height"]
    
    return frame

In [5]:
species = "Cat"

v0 = pd.read_pickle(f"../EIPH_{species}_SDATA_Inference_Annotations.pkl")
v0 = set_additional_fields(v0)
print(f"Inference: {len(v0)}")

v_cluster = pd.read_pickle(f"../EIPH_{species}_SDATA_Cluster_Annotations.pkl")
v_cluster = set_additional_fields(v_cluster)
print(f"Cluster: {len(v_cluster)}")

v_screening = pd.read_pickle(f"../EIPH_{species}_SDATA_Screening_Annotations.pkl")
v_screening = set_additional_fields(v_screening)
print(f"Screening: {len(v_screening)}")

v_density = pd.read_pickle(f"../EIPH_{species}_SDATA_Density_Annotations.pkl")
print(f"Density: {len(v_density)}")

Inference: 94788
Cluster: 88848
Screening: 20422
Density: 20198


In [6]:
v0.head()

Unnamed: 0,id,image_id,image_set,species,image_name,image_type,grade,vector,unique_identifier,user_id,deleted,last_editor,data_set_name,version,width,height,scales
0,1049539,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 9903.0, 'x2': 10029.0, 'y1': 5508.0, 'y...",875aab61-3f1f-429a-a053-f87aba6c99da,1,False,1,SDATA,Inference,126.0,129.0,0.976744
1,1049540,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 26435, 'x2': 26544, 'y1': 24003, 'y2': ...",37e8ad72-ba58-4608-aeab-cfdf004d1f00,1,True,12,SDATA,Inference,109.0,109.0,1.0
2,1049542,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 18462, 'x2': 18588, 'y1': 10556, 'y2': ...",37f441f4-39ec-478c-9718-c04c4a11c914,1,True,12,SDATA,Inference,126.0,126.0,1.0
3,1049545,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 23213.0, 'x2': 23359.0, 'y1': 5401.0, '...",74862e27-5881-41d8-90f3-5eb305d35e41,1,True,1,SDATA,Inference,146.0,104.0,1.403846
4,1049548,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 12243.0, 'x2': 12375.0, 'y1': 14740.0, ...",d718f087-7a46-4e22-8610-ba5f82d04818,1,False,1,SDATA,Inference,132.0,132.0,1.0


## Filter images

In [7]:
screened = [
    'BAL_Cat_Turnbull blue_1.svs',
    'BAL_Cat_Turnbull blue_2.svs',
    'BAL_Cat_Turnbull blue_6.svs',
    'BAL_Cat_Turnbull blue_10.svs',
    'BAL_Cat_Turnbull blue_12.svs',
    'BAL_Cat_Turnbull blue_13.svs',
    'BAL_Cat_Turnbull blue_14.svs',
    
    'BAL_Cat_Turnbull blue_1.tiff',
    'BAL_Cat_Turnbull blue_2.tiff',
    'BAL_Cat_Turnbull blue_6.tiff',
    'BAL_Cat_Turnbull blue_10.tiff',
    'BAL_Cat_Turnbull blue_12.tiff',
    'BAL_Cat_Turnbull blue_13.tiff',
    'BAL_Cat_Turnbull blue_14.tiff',
]

v0 = v0[v0["image_name"].isin(screened)]
v_cluster = v_cluster[v_cluster["image_name"].isin(screened)]
v_screening = v_screening[v_screening["image_name"].isin(screened)]
v_density = v_density[v_density["image_name"].isin(screened)]

In [7]:
v_screening[v_screening["image_name"].isin(screened)]

Unnamed: 0,id,image_id,image_set,species,image_name,image_type,grade,vector,unique_identifier,user_id,deleted,last_editor,data_set_name,version,width,height,scales
0,1049539,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 9903.0, 'x2': 10029.0, 'y1': 5508.0, 'y...",875aab61-3f1f-429a-a053-f87aba6c99da,1,False,1,SDATA,Screened,126.0,129.0,0.976744
1,1049548,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 12243.0, 'x2': 12375.0, 'y1': 14740.0, ...",d718f087-7a46-4e22-8610-ba5f82d04818,1,False,1,SDATA,Screened,132.0,132.0,1.000000
2,1049551,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 20253.0, 'x2': 20366.0, 'y1': 4149.0, '...",7e9cc21f-4a51-4d12-99dd-daffa3e6f368,1,False,1,SDATA,Screened,113.0,114.0,0.991228
3,1049568,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 2906.0, 'x2': 3030.0, 'y1': 16767.0, 'y...",0a546034-021b-4739-82bf-b09c7a5226d5,1,False,1,SDATA,Screened,124.0,124.0,1.000000
4,1049624,3512,222,Cat,BAL_Cat_Turnbull blue_1.svs,Turnbull,0,"{'x1': 11203.0, 'x2': 11328.0, 'y1': 7867.0, '...",52536c1e-32f3-4a15-9c08-0350c0ad1b2b,1,False,1,SDATA,Screened,125.0,125.0,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20417,2325303,3544,222,Cat,BAL_Cat_Turnbull blue_14.svs,Turnbull,1,"{'x1': 18334, 'x2': 18458, 'y1': 27196, 'y2': ...",cb298f85-3464-401c-a587-977c3a7db8ed,12,False,12,SDATA,Screened,124.0,145.0,0.855172
20418,2325306,3544,222,Cat,BAL_Cat_Turnbull blue_14.svs,Turnbull,1,"{'x1': 17441, 'x2': 17563, 'y1': 27776, 'y2': ...",0a92e74c-8988-4437-a34f-d14acfc27287,12,False,12,SDATA,Screened,122.0,127.0,0.960630
20419,2325308,3544,222,Cat,BAL_Cat_Turnbull blue_14.svs,Turnbull,1,"{'x1': 17036, 'x2': 17168, 'y1': 26942, 'y2': ...",6fa9e0ef-c84a-47d4-8cae-b004b72b7a1c,12,False,12,SDATA,Screened,132.0,123.0,1.073171
20420,778606,3544,222,Cat,BAL_Cat_Turnbull blue_14.svs,Turnbull,2,"{'x1': 25225, 'x2': 25335, 'y1': 7969, 'y2': 8...",f436d337-cfe7-42b6-9ffe-c9349b7731f4,1,False,12,SDATA,Screened,110.0,116.0,0.948276


## Global Stats

In [8]:
def get_global_stats(frame, dataset , version, species):
    
    nr_slides = len(frame["image_name"].unique())
    total = frame.shape[0]
    score = int(frame["grade"].mean() * 100)
    _0 = frame[frame["grade"] == 0].shape[0]
    _1 = frame[frame["grade"] == 1].shape[0]
    _2 = frame[frame["grade"] == 2].shape[0]
    _3 = frame[frame["grade"] == 3].shape[0]
    _4 = frame[frame["grade"] == 4].shape[0]
    mean = frame["grade"].mean()
    SD = frame["grade"].std()
    
    frame = [[species, dataset, nr_slides, version, total, score, _0, _1, _2, _3, _4, mean, SD]]
    
    return pd.DataFrame(frame, columns=["species", "dataset" ,"slides", "version", "total", "score", "0", "1", "2", "3", "4", "mean", "SD"])

In [9]:
v0_meta = get_global_stats(v0, "SDATA", "Inference", species)
v_cluster_meta = get_global_stats(v_cluster, "SDATA", "Cluster", species)
v_screening_meta = get_global_stats(v_screening, "SDATA", "Screening", species)
v_density_meta = get_global_stats(v_density, "SDATA", "Density", species)

metaDf = pd.concat([v0_meta, v_cluster_meta, v_screening_meta, v_density_meta])
metaDf.head()

Unnamed: 0,species,dataset,slides,version,total,score,0,1,2,3,4,mean,SD
0,Cat,SDATA,7,Inference,94788,38,58879,35659,122,8,120,0.384089,0.504816
0,Cat,SDATA,7,Cluster,88848,38,54867,33868,103,5,5,0.383903,0.489751
0,Cat,SDATA,7,Screening,20422,33,13631,6747,41,2,1,0.334884,0.477429
0,Cat,SDATA,7,Density,20198,45,11124,9039,35,0,0,0.450985,0.501075


## Stain 

In [10]:
"BerlinerBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" in name]))

'BerlinerBlaue: 0'

In [11]:
"TurnbullBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" not in name]))

'TurnbullBlaue: 7'

## Introduced new cells?

In [12]:
188

188

## Images

In [13]:
v0["image_name"].unique()

array(['BAL_Cat_Turnbull blue_1.svs', 'BAL_Cat_Turnbull blue_2.svs',
       'BAL_Cat_Turnbull blue_6.svs', 'BAL_Cat_Turnbull blue_10.svs',
       'BAL_Cat_Turnbull blue_12.svs', 'BAL_Cat_Turnbull blue_13.svs',
       'BAL_Cat_Turnbull blue_14.svs'], dtype=object)

In [14]:
len(v0["image_name"].unique())

7

## Density changes

In [8]:
screening_changes = {source_uuid: source_grade for source_uuid, source_grade in 
                     zip(v_screening["unique_identifier"], v_screening["grade"])}

density_changes = {density_uuid: density_grade for density_uuid, density_grade in 
                     zip(v_density["unique_identifier"], v_density["grade"])}

In [17]:
total_changes = 0
up_changes = {}
down_changes = {}
grade_one_error = 0

for source_uuid, source_grade in screening_changes.items():
    
    if source_uuid in density_changes:
        
        density_grade = density_changes[source_uuid]
        if source_grade != density_grade:
            name = f"{source_grade} -> {density_grade}"
            
            if source_grade < density_grade:
                if name not in up_changes:
                    up_changes[name] = 0
                up_changes[name] += 1
                total_changes += 1
                
            if source_grade > density_grade:
                if name not in down_changes:
                    down_changes[name] = 0
                down_changes[name] += 1
                total_changes += 1
                
            if abs(density_grade - source_grade) == 1:
                grade_one_error += 1
                
total_changes

2957

In [18]:
grade_one_error

2956

In [10]:
up_changes

{'0 -> 1': 2654, '1 -> 2': 25}

In [15]:
f"Total Up: {sum(up_changes.values())}" 

'Total Up: 2679'

In [11]:
down_changes

{'4 -> 0': 1, '1 -> 0': 252, '2 -> 1': 24, '3 -> 2': 1}

In [16]:
f"Total Down: {sum(down_changes.values())}" 

'Total Down: 278'