In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from pathlib import Path

# Load annotation versions

In [3]:
species = "Equine"

v0 = pd.read_pickle(f"../EIPH_{species}_SDATA_Inference_Annotations.pkl")
print(f"Inference: {len(v0)}")

v_screening = pd.read_pickle(f"../EIPH_{species}_SDATA_Screening_Annotations.pkl")
print(f"Screening: {len(v_screening)}")

v_density = pd.read_pickle(f"../EIPH_{species}_SDATA_Density_Annotations.pkl")
print(f"Density: {len(v_density)}")

Inference: 255074
Screening: 171955
Density: 167876


In [4]:
v0.head()

Unnamed: 0,id,image_id,image_set,species,image_name,image_type,grade,vector,unique_identifier,user_id,deleted,last_editor,data_set_name,version
0,1433998,3530,246,equine,07_EIPH_574882 Berliner Blau-001.svs,Prussian,0,"{'x1': 22691, 'x2': 22782, 'y1': 9025, 'y2': 9...",927cade0-4579-48e1-a884-a07d88594a0a,1,True,1,SDATA,Inference
1,1433999,3530,246,equine,07_EIPH_574882 Berliner Blau-001.svs,Prussian,0,"{'x1': 32197, 'x2': 32338, 'y1': 15975, 'y2': ...",cafc4aa3-324f-4015-9451-ad7e5bee53f3,1,False,1,SDATA,Inference
2,1434000,3530,246,equine,07_EIPH_574882 Berliner Blau-001.svs,Prussian,0,"{'x1': 23069, 'x2': 23180, 'y1': 11514, 'y2': ...",979ef20e-6228-4cb3-ac9a-aecea6ad75c1,1,False,1,SDATA,Inference
3,1434001,3530,246,equine,07_EIPH_574882 Berliner Blau-001.svs,Prussian,0,"{'x1': 9045, 'x2': 9148, 'y1': 9711, 'y2': 9814}",ef87d780-5e2e-413e-a2ff-0712f6aa1532,1,False,1,SDATA,Inference
4,1434002,3530,246,equine,07_EIPH_574882 Berliner Blau-001.svs,Prussian,0,"{'x1': 10538, 'x2': 10645, 'y1': 10306, 'y2': ...",0f141dc4-5589-4e5a-a504-d77e7ad94683,1,False,1,SDATA,Inference


## Filter images

In [4]:
filter_images = ['26_EIPH_566482 L Turnbull blue.svs', '26_EIPH_566482 L Turnbull blue.tiff']

v0 = v0[~v0["image_name"].isin(filter_images)]
v_screening = v_screening[~v_screening["image_name"].isin(filter_images)]
v_density = v_density[~v_density["image_name"].isin(filter_images)]

## Global Stats

In [6]:
def get_global_stats(frame, dataset , version, species):
    
    nr_slides = len(frame["image_name"].unique())
    total = frame.shape[0]
    score = int(frame["grade"].mean() * 100)
    _0 = frame[frame["grade"] == 0].shape[0]
    _1 = frame[frame["grade"] == 1].shape[0]
    _2 = frame[frame["grade"] == 2].shape[0]
    _3 = frame[frame["grade"] == 3].shape[0]
    _4 = frame[frame["grade"] == 4].shape[0]
    mean = frame["grade"].mean()
    SD = frame["grade"].std()
    
    frame = [[species, dataset, nr_slides, version, total, score, _0, _1, _2, _3, _4, mean, SD]]
    
    return pd.DataFrame(frame, columns=["species", "dataset" ,"slides", "version", "total", "score", "0", "1", "2", "3", "4", "mean", "SD"])

In [7]:
v0_meta = get_global_stats(v0, "SDATA", "Inference", species)
v_screening_meta = get_global_stats(v_screening, "SDATA", "Screening", species)
v_density_meta = get_global_stats(v_density, "SDATA", "Density", species)

metaDf = pd.concat([v0_meta, v_screening_meta, v_density_meta])
metaDf.head()

Unnamed: 0,species,dataset,slides,version,total,score,0,1,2,3,4,mean,SD
0,Equine,SDATA,39,Inference,245397,95,97904,80715,47789,17437,1552,0.956866,0.965915
0,Equine,SDATA,39,Screening,168333,108,54432,60189,39316,13404,992,1.087137,0.959626
0,Equine,SDATA,39,Density,164365,101,51797,67798,36339,7810,621,1.01232,0.871861


## Stain

In [9]:
"BerlinerBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" in name]))

'BerlinerBlaue: 18'

In [10]:
"TurnbullBlaue: {}".format(len([name for name in v0["image_name"].unique() if "erliner" not in name]))

'TurnbullBlaue: 21'

## Animals

In [9]:
unique_names = set([name.replace(" ", "_").split("_")[2] for name in v0["image_name"].unique()])
unique_names

{'563417',
 '563476',
 '563479',
 '566481',
 '566482',
 '566933',
 '567017',
 '568150',
 '568320',
 '568354',
 '568355',
 '568381',
 '569923',
 '569948',
 '570370',
 '571557',
 '574162',
 '574882',
 '574999',
 '575216',
 '575697',
 '575796',
 '576150',
 '576172',
 '576255',
 '588355'}

In [10]:
len(unique_names)

26

## Introduced new cells?

In [11]:
v_screening[v_screening["user_id"]==12].shape

(150, 14)

## Images

In [12]:
v0["image_name"].unique()

array(['07_EIPH_574882 Berliner Blau-001.svs',
       '08_EIPH_574999 Berliner Blau.svs',
       '09_EIPH_563417 L berliner blau.svs',
       '10_EIPH_563417 R Berliner Blau.svs',
       '13_EIPH_570370 Berliner Blau.svs',
       '14_EIPH_568381 berliner blau-001.svs',
       '15_EIPH_568320 berliner blau.svs',
       '16_EIPH_574882 R Berliner Blau.svs',
       '17_EIPH_575796 Berliner Blau.svs',
       '18_EIPH_574882 R Berliner Blau.svs',
       '19_EIPH_566933 L Berliner Blau.svs',
       '22_EIPH_575216 Berliner Blau.svs',
       '23_EIPH_563476 Berliner Blau-001.svs',
       '24_EIPH_576255 Berliner Blau.svs',
       '25_EIPH_568150 Berliner Blau.svs',
       '28_EIPH_569948 L berliner blau.svs',
       '29_EIPH_566481 L Berliner Blau.svs',
       '30_EIPH_588355 Berliner Blau.svs',
       '01_EIPH_563479 Turnbull blue.svs',
       '02_EIPH_574162 Turnbull blue-001.svs',
       '03_EIPH_566933 R Turnbull blue.svs',
       '04_EIPH_567017 Turnbull blue-001.svs',
       '05_EIPH_56

In [13]:
len(v0["image_name"].unique())

39

In [None]:
3