In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from pathlib import Path

# Load annotation versions

In [3]:
v0 = pd.read_pickle("../EIPH_Equine_SREP_Annotations.pkl")
print(f"Initial: {len(v0)}")

v_screening = pd.read_pickle("../EIPH_SREP_Screening_Annotations.pkl")
print(f"Screening: {len(v_screening)}")

v_density = pd.read_pickle("../EIPH_SREP_Density_Annotations.pkl")
print(f"Density: {len(v_density)}")

Initial: 78081
Screening: 60316
Density: 59318


In [4]:
v0.head()

Unnamed: 0,id,image_id,image_set,species,image_name,image_type,grade,vector,unique_identifier,user_id,deleted,last_editor,data_set_name,version
0,241301,231,15,equine,05_EIPH_569923 Berliner Blau.svs,Prussian,0,"{'x1': 24170, 'x2': 24312, 'y1': 2791, 'y2': 2...",671c255e-fe6b-4e39-9d9f-5e0c97a76b7a,11,True,1,SREP,SREP
1,241302,231,15,equine,05_EIPH_569923 Berliner Blau.svs,Prussian,0,"{'x1': 19487, 'x2': 19591, 'y1': 7877, 'y2': 7...",dfd15c9e-e166-4bfc-b3e9-8d1faeecc5d4,11,False,1,SREP,SREP
2,241304,231,15,equine,05_EIPH_569923 Berliner Blau.svs,Prussian,0,"{'x1': 19907, 'x2': 20039, 'y1': 7912, 'y2': 8...",e599de1e-7d9a-41e1-9017-31f2088d4b90,11,False,1,SREP,SREP
3,241305,231,15,equine,05_EIPH_569923 Berliner Blau.svs,Prussian,0,"{'x1': 20235, 'x2': 20313, 'y1': 8318, 'y2': 8...",08b33057-c60c-4ff1-a6ca-b3878e2b0647,11,True,1,SREP,SREP
4,241306,231,15,equine,05_EIPH_569923 Berliner Blau.svs,Prussian,0,"{'x1': 20429, 'x2': 20545, 'y1': 8110, 'y2': 8...",62e738d9-6ca9-4962-8c26-f2f518c67bef,11,False,1,SREP,SREP


## Filter images

In [5]:
filter_images = ['08_EIPH_574999 R Turnbull blue.svs', '08_EIPH_574999 R Turnbull blue.tiff']

v0 = v0[~v0["image_name"].isin(filter_images)]
v_screening = v_screening[~v_screening["image_name"].isin(filter_images)]
v_density = v_density[~v_density["image_name"].isin(filter_images)]

## Global Stats

In [20]:
def get_global_stats(frame, dataset , version, species):
    
    nr_slides = len(frame["image_name"].unique())
    total = frame.shape[0]
    score = int(frame["grade"].mean() * 100)
    _0 = frame[frame["grade"] == 0].shape[0]
    _1 = frame[frame["grade"] == 1].shape[0]
    _2 = frame[frame["grade"] == 2].shape[0]
    _3 = frame[frame["grade"] == 3].shape[0]
    _4 = frame[frame["grade"] == 4].shape[0]
    mean = frame["grade"].mean()
    SD = frame["grade"].std()
    
    frame = [[species, dataset, nr_slides, version, total, score, _0, _1, _2, _3, _4, mean, SD]]
    
    return pd.DataFrame(frame, columns=["species", "dataset" ,"slides", "version", "total", "score", "0", "1", "2", "3", "4", "mean", "SD"])

In [22]:
v0_meta = get_global_stats(v0, "SREP", "SREP", "Equine")
v_screening_meta = get_global_stats(v_screening, "SREP", "Screening", "Equine")
v_density_meta = get_global_stats(v_density, "SREP", "Density", "Equine")

metaDf = pd.concat([v0_meta, v_screening_meta, v_density_meta])
metaDf.head()

Unnamed: 0,species,dataset,slides,version,total,score,0,1,2,3,4,mean,SD
0,Equine,SREP,16,SREP,77004,102,29017,26810,13178,6577,1422,1.020531,1.027324
0,Equine,SREP,16,Screening,59954,112,19733,21545,11442,5963,1271,1.124229,1.045563
0,Equine,SREP,16,Density,58956,109,19246,21595,11829,5552,734,1.099888,1.002928


## Stain

In [26]:
"BerlinerBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" in name]))

'BerlinerBlaue: 10'

In [27]:
"TurnbullBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" not in name]))

'TurnbullBlaue: 6'

## Animals

In [9]:
unique_names = set([name.replace(" ", "_").split("_")[2] for name in v0["image_name"].unique()])
unique_names

{'563417',
 '563479',
 '566481',
 '566482',
 '566933',
 '567017',
 '568354',
 '568381',
 '569923',
 '569948',
 '571557',
 '574162',
 '574882',
 '575216',
 '575697',
 '576150'}

In [10]:
len(unique_names)

16

## Introduced new cells?

In [28]:
v_screening[v_screening["user_id"]==12].shape

(30, 14)

## Images

In [30]:
v0["image_name"].unique()

array(['05_EIPH_569923 Berliner Blau.svs',
       '11_EIPH_575697 Berliner Blau.svs',
       '02_EIPH_574162 berliner blau.svs',
       '26_EIPH_566482 L Berliner Blau.svs',
       '31_EIPH_568354 Berliner Blau.svs',
       '04_EIPH_567017 Berliner Blau.svs',
       '27_EIPH_571557 berliner blau.svs',
       '01_EIPH_563479 Berliner Blau.svs',
       '03_EIPH_566933 R Berliner Blau neu.svs',
       '20_EIPH_576150 berliner blau.svs',
       '07_EIPH_574882 left lung Turnbull blue.svs',
       '09_EIPH_563417 L Turnbull blue.svs',
       '14_EIPH_568381 Turnbull blue.svs',
       '22_EIPH_575216 Turnbull blue.svs',
       '28_EIPH_569948 L Turnbull blue.svs',
       '29_EIPH_566481 L Turnbull blue.svs'], dtype=object)

In [31]:
len(v0["image_name"].unique())

16

## Density changes

In [7]:
screening_changes = {source_uuid: source_grade for source_uuid, source_grade in 
                     zip(v_screening["unique_identifier"], v_screening["grade"])}

density_changes = {density_uuid: density_grade for density_uuid, density_grade in 
                     zip(v_density["unique_identifier"], v_density["grade"])}

In [8]:
total_changes = 0
up_changes = {}
down_changes = {}
grade_one_error = 0

for source_uuid, source_grade in screening_changes.items():
    
    if source_uuid in density_changes:
        
        density_grade = density_changes[source_uuid]
        if source_grade != density_grade:
            name = f"{source_grade} -> {density_grade}"
            
            if source_grade < density_grade:
                if name not in up_changes:
                    up_changes[name] = 0
                up_changes[name] += 1
                total_changes += 1
                
            if source_grade > density_grade:
                if name not in down_changes:
                    down_changes[name] = 0
                down_changes[name] += 1
                total_changes += 1
                
            if abs(density_grade - source_grade) == 1:
                grade_one_error += 1
                
total_changes

7798

In [9]:
grade_one_error

7789

In [10]:
up_changes

{'0 -> 1': 1346,
 '1 -> 2': 1139,
 '2 -> 3': 585,
 '3 -> 4': 87,
 '0 -> 3': 1,
 '0 -> 2': 4,
 '1 -> 3': 1}

In [11]:
f"Total Up: {sum(up_changes.values())}" 

'Total Up: 3163'

In [12]:
down_changes

{'1 -> 0': 1262,
 '2 -> 1': 1427,
 '3 -> 2': 1395,
 '4 -> 3': 548,
 '3 -> 1': 1,
 '2 -> 0': 2}

In [13]:
f"Total Down: {sum(down_changes.values())}" 

'Total Down: 4635'