## Evaluation of expert consistency

The individual annotations of the five experts are stored in the folder 'databases'

In this notebook we read all individual annotations and match them according to the coordinates.
A match is defined by a proximity of 25 pixels.


In [1]:
from SlideRunner.dataAccess.database import Database

files = [f'databases/BiMulti_VP{k+1}.sqlite' for k in range(6)]

files_GT_DB = ['databases/MITOS_WSI_CCMCT_ODAEL_BiMulti_10HPF.sqlite']


The class "Annotations" is used to self-generate a ground truth out of the votes. However, it also supports adding manual ground truth (add_GT).

In [2]:
from uuid import uuid4
import numpy as np

        

class Annotations:
    def __init__(self, gtcount, initcoordinates={}, initvotes={}):
        self.coordinates = {}
        self.votes = {}
        self.gtcount = gtcount
        self.matchingRadius = 25
        self.override_GT = None
    
    def TP(self, expertId):
        gta = self.GTannotations()
        TP = [k for k,v in zip(self.votes.keys(), self.votes.values()) if expertId in v and k in gta.coordinates.keys()]
        return len(TP)

    def FP(self, expertId):
        gta = self.GTannotations()
        FP = [k for k,v in zip(self.votes.keys(), self.votes.values()) if expertId in v and k not in gta.coordinates.keys()]
        return len(FP)

    def FN(self, expertId):
        gta = self.GTannotations()
        FN = [k for k,v in zip(self.votes.keys(), self.votes.values()) if expertId not in v and k in gta.coordinates.keys()]
        return len(FN)

    def MC(self, expertId):
        MC = [k for k,v in zip(self.votes.keys(), self.votes.values()) if expertId in v]
        return len(MC)
    
    def GTannotations(self) -> "Annotations":
        if self.override_GT is not None: return self.override_GT
        idxs = [k for k,v in zip(self.votes.keys(),self.votes.values()) if len(v)>=self.gtcount]
        lens = {k:len(v) for k,v in zip(self.votes.keys(),self.votes.values())}
        initcoordinates = {k: v for k,v in zip(self.coordinates.keys(),self.coordinates.values()) if k in idxs}
        initvotes =  {k:v for k,k in zip(self.votes.keys(),self.votes.values()) if k in idxs}
        a2 = Annotations(self.gtcount)
        a2.coordinates=initcoordinates
        a2.votes = initvotes
        return a2
    
    def add(self, coordinates:tuple, expertId:int):
        co = self.coordinates.values()

        matching = [((c[0]-coordinates[0])**2)+((c[1]-coordinates[1])**2)<self.matchingRadius**2 for c in co]
        if (np.sum(matching)==0):
            # not found, create new entry
            u = uuid4()
            self.coordinates[u] = coordinates
            self.votes[u] = [expertId]
        else:
            match = np.where(matching)[0][0]
            u = list(self.coordinates.keys())[match]
            self.votes[u].append(expertId)

    def add_GT(self, coordinates:tuple, expertId:int):
        if (self.override_GT) is None:
            self.override_GT = Annotations(1)
            # Now overwriting GT annotations
            
        co = self.coordinates.values()

        matching = [((c[0]-coordinates[0])**2)+((c[1]-coordinates[1])**2)<self.matchingRadius**2 for c in co]
        if (np.sum(matching)==0):
            # not found, create new entry
            u = uuid4()
            self.override_GT.coordinates[u] = coordinates
            self.override_GT.votes[u] = [expertId]
        else:
            match = np.where(matching)[0][0]
            u = list(self.coordinates.keys())[match]
            self.override_GT.votes[u] = [expertId]
            self.override_GT.coordinates[u] = self.coordinates[u]
    
    def __repr__(self) -> str:
        return f'<Annotations> object with {len(self.coordinates.keys())} annotations.'

### Assemble Ground Truth for two-expert vote

In [3]:
slideAnnos_bi = {}
slideAnnos_multi = {}

for file in files:
    DB = Database().open(file)
    print(DB.getAllClasses())
    multiclass = [id for (name,id,col) in DB.getAllClasses() if 'ulti' in name]
    biclass = [id for (name,id,col) in DB.getAllClasses() if 'ulti' not in name]
    if (len(biclass)>1):
        raise(Error, 'Multiple classes for bi found')

    if (len(multiclass)>1):
        raise(Error, 'Multiple classes for multi found')
        
    for dbid,fname in DB.listOfSlides():
        if fname not in slideAnnos_bi:
            slideAnnos_bi[fname] = Annotations(gtcount=2)
        if fname not in slideAnnos_multi:
            slideAnnos_multi[fname] = Annotations(gtcount=2)
        DB.loadIntoMemory(dbid)
        for anno in DB.annotations:
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==biclass[0]:
                slideAnnos_bi[fname].add([DB.annotations[anno].x1,DB.annotations[anno].y1], file)
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==multiclass[0]:
                slideAnnos_multi[fname].add([DB.annotations[anno].x1,DB.annotations[anno].y1], file)

                
                
GTBiNC={}
for k in slideAnnos_bi.keys():
    GTBiNC[k] = len(slideAnnos_bi[k].GTannotations().coordinates)
    
GTMuNC={}
for k in slideAnnos_multi.keys():
    GTMuNC[k] = len(slideAnnos_multi[k].GTannotations().coordinates)
    
    

[('Binucleated', 1, '#00aa00'), ('Multinucleated', 2, '#0000ff')]
[('Bi-nucleated', 1, '#73d216'), ('Multi-nucleated', 2, '#f5ac99')]
[('Binucleated', 1, '#2d2e3c'), ('Multinucleated', 2, '#91c5ba')]
[('Binucleated', 1, '#73d216'), ('Multinucleated', 2, '#5fbbc2')]
[('Binucleate cells', 1, '#c7b326'), ('Multinucleate cells', 2, '#afd8ba')]
[('Binucleated', 1, '#8be720'), ('Multinucleated', 2, '#7e4cc1')]


### Store two-expert vote in database

In [4]:
DB_MPGT = Database().create('databases/BiMulti_MultiPathologistGT_twoExpertConsensus.sqlite')
DB_MPGT.insertAnnotator('Expert consensus')
DB_MPGT.insertClass('Binucleated')
DB_MPGT.insertClass('Multinucleated')


for k in slideAnnos_bi.keys():
    sluid = DB_MPGT.insertNewSlide(k,'')
    for (x,y) in slideAnnos_bi[k].GTannotations().coordinates.values():
        DB_MPGT.insertNewSpotAnnotation(x,y,sluid,1,1)

    for (x,y) in slideAnnos_multi[k].GTannotations().coordinates.values():
        DB_MPGT.insertNewSpotAnnotation(x,y,sluid,2,1)

DB_MPGT.db.close()

### Binucleated

In [5]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_bi.keys():
        TP += slideAnnos_bi[slides].TP(author)
        FP += slideAnnos_bi[slides].FP(author)
        FN += slideAnnos_bi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, F1:{F1}')
    
print('Total positives binucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:134, FP:64, FN:141, F1:0.5665961825243936
databases/BiMulti_VP2.sqlite: TP:78, FP:18, FN:197, F1:0.42048516386832446
databases/BiMulti_VP3.sqlite: TP:126, FP:126, FN:149, F1:0.478178359047849
databases/BiMulti_VP4.sqlite: TP:98, FP:80, FN:177, F1:0.43267107212646644
databases/BiMulti_VP5.sqlite: TP:240, FP:424, FN:35, F1:0.5111821031822993
databases/BiMulti_VP6.sqlite: TP:71, FP:36, FN:204, F1:0.3717277389600069
Total positives binucleated: 275
Median F1:  0.4554247155871577


### Multinucleated

In [6]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_multi.keys():
        TP += slideAnnos_multi[slides].TP(author)
        FP += slideAnnos_multi[slides].FP(author)
        FN += slideAnnos_multi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, F1:{F1}')
    
print('Total positives multinucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:25, FP:8, FN:32, F1:0.5555554938271673
databases/BiMulti_VP2.sqlite: TP:27, FP:10, FN:30, F1:0.5744680239927634
databases/BiMulti_VP3.sqlite: TP:33, FP:48, FN:24, F1:0.47826083490863514
databases/BiMulti_VP4.sqlite: TP:26, FP:10, FN:31, F1:0.5591397248236855
databases/BiMulti_VP5.sqlite: TP:42, FP:35, FN:15, F1:0.6268656248607742
databases/BiMulti_VP6.sqlite: TP:20, FP:34, FN:37, F1:0.36036032789546596
Total positives multinucleated: 57
Median F1:  0.5573476093254264


## Majority vote (GT count = 3)

In [7]:
slideAnnos_bi = {}
slideAnnos_multi = {}

for file in files:
    DB = Database().open(file)
    print(DB.getAllClasses())
    multiclass = [id for (name,id,col) in DB.getAllClasses() if 'ulti' in name]
    biclass = [id for (name,id,col) in DB.getAllClasses() if 'ulti' not in name]
    if (len(biclass)>1):
        raise(Error, 'Multiple classes for bi found')

    if (len(multiclass)>1):
        raise(Error, 'Multiple classes for multi found')
        
    for dbid,fname in DB.listOfSlides():
        if fname not in slideAnnos_bi:
            slideAnnos_bi[fname] = Annotations(gtcount=3)
        if fname not in slideAnnos_multi:
            slideAnnos_multi[fname] = Annotations(gtcount=3)
        DB.loadIntoMemory(dbid)
        for anno in DB.annotations:
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==biclass[0]:
                slideAnnos_bi[fname].add([DB.annotations[anno].x1,DB.annotations[anno].y1], file)
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==multiclass[0]:
                slideAnnos_multi[fname].add([DB.annotations[anno].x1,DB.annotations[anno].y1], file)

                
                
GTBiNC={}
for k in slideAnnos_bi.keys():
    GTBiNC[k] = len(slideAnnos_bi[k].GTannotations().coordinates)
    
GTMuNC={}
for k in slideAnnos_multi.keys():
    GTMuNC[k] = len(slideAnnos_multi[k].GTannotations().coordinates)
    
    

[('Binucleated', 1, '#00aa00'), ('Multinucleated', 2, '#0000ff')]
[('Bi-nucleated', 1, '#73d216'), ('Multi-nucleated', 2, '#f5ac99')]
[('Binucleated', 1, '#2d2e3c'), ('Multinucleated', 2, '#91c5ba')]
[('Binucleated', 1, '#73d216'), ('Multinucleated', 2, '#5fbbc2')]
[('Binucleate cells', 1, '#c7b326'), ('Multinucleate cells', 2, '#afd8ba')]
[('Binucleated', 1, '#8be720'), ('Multinucleated', 2, '#7e4cc1')]


### Store three-expert ground truth into database file

In [8]:
DB_MPGT = Database().create('databases/BiMulti_MultiPathologistGT_ConsensusOfThree.sqlite')
DB_MPGT.insertAnnotator('Expert consensus')
DB_MPGT.insertClass('Binucleated')
DB_MPGT.insertClass('Multinucleated')


for k in slideAnnos_bi.keys():
    sluid = DB_MPGT.insertNewSlide(k,'')
    for (x,y) in slideAnnos_bi[k].GTannotations().coordinates.values():
        DB_MPGT.insertNewSpotAnnotation(x,y,sluid,1,1)

    for (x,y) in slideAnnos_multi[k].GTannotations().coordinates.values():
        DB_MPGT.insertNewSpotAnnotation(x,y,sluid,2,1)

DB_MPGT.db.close()

### Bi-Nucleated

In [9]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_bi.keys():
        TP += slideAnnos_bi[slides].TP(author)
        FP += slideAnnos_bi[slides].FP(author)
        FN += slideAnnos_bi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, F1:{F1}')
    
print('Total positives binucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:79, FP:119, FN:31, F1:0.5129869963315911
databases/BiMulti_VP2.sqlite: TP:54, FP:42, FN:56, F1:0.5242718192101059
databases/BiMulti_VP3.sqlite: TP:71, FP:181, FN:39, F1:0.392265182534111
databases/BiMulti_VP4.sqlite: TP:61, FP:117, FN:49, F1:0.42361109640239253
databases/BiMulti_VP5.sqlite: TP:101, FP:563, FN:9, F1:0.26098190877284355
databases/BiMulti_VP6.sqlite: TP:51, FP:56, FN:59, F1:0.4700460612881999
Total positives binucleated: 110
Median F1:  0.44682857884529625


### Multi-nucleated

In [10]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_multi.keys():
        TP += slideAnnos_multi[slides].TP(author)
        FP += slideAnnos_multi[slides].FP(author)
        FN += slideAnnos_multi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, F1:{F1}')
    
print('Total positives multinucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:18, FP:15, FN:8, F1:0.6101693881068834
databases/BiMulti_VP2.sqlite: TP:16, FP:21, FN:10, F1:0.5079364273116782
databases/BiMulti_VP3.sqlite: TP:19, FP:62, FN:7, F1:0.3551401537252193
databases/BiMulti_VP4.sqlite: TP:19, FP:17, FN:7, F1:0.6129031269511085
databases/BiMulti_VP5.sqlite: TP:24, FP:53, FN:2, F1:0.4660193722311289
databases/BiMulti_VP6.sqlite: TP:15, FP:39, FN:11, F1:0.37499995312500584
Total positives multinucleated: 26
Median F1:  0.48697789977140354


# Now evaluate on Dataset GT

In [11]:
DB = Database().open(files_GT_DB[0])
for file in files:
    
    for dbid,fname in DB.listOfSlides():
        DB.loadIntoMemory(dbid)
        if fname not in slideAnnos_bi: 
            continue # not part of test set
        for anno in DB.annotations:
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==5:
                slideAnnos_bi[fname].add_GT([DB.annotations[anno].x1,DB.annotations[anno].y1], 'GT')
            if not DB.annotations[anno].deleted and DB.annotations[anno].agreedClass==6:
                slideAnnos_multi[fname].add_GT([DB.annotations[anno].x1,DB.annotations[anno].y1], 'GT')
    


### Bi-Nucleated

In [12]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_bi.keys():
        TP += slideAnnos_bi[slides].TP(author)
        FP += slideAnnos_bi[slides].FP(author)
        FN += slideAnnos_bi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, Sens:{TP/(TP+FN):.3f}, Prec:{TP/(TP+FP):.3f} F1:{F1:.3f}')
    
    
print('Total positives binucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:91, FP:107, FN:57, Sens:0.615, Prec:0.460 F1:0.526
databases/BiMulti_VP2.sqlite: TP:40, FP:56, FN:108, Sens:0.270, Prec:0.417 F1:0.328
databases/BiMulti_VP3.sqlite: TP:56, FP:196, FN:92, Sens:0.378, Prec:0.222 F1:0.280
databases/BiMulti_VP4.sqlite: TP:44, FP:134, FN:104, Sens:0.297, Prec:0.247 F1:0.270
databases/BiMulti_VP5.sqlite: TP:122, FP:542, FN:26, Sens:0.824, Prec:0.184 F1:0.300
databases/BiMulti_VP6.sqlite: TP:42, FP:65, FN:106, Sens:0.284, Prec:0.393 F1:0.329
Total positives binucleated: 148
Median F1:  0.3141807230792797


### Multi-Nucleated

In [13]:
F1s = []
for author in files:
    TP,FN,FP = 0,0,0
    
    for slides in slideAnnos_multi.keys():
        TP += slideAnnos_multi[slides].TP(author)
        FP += slideAnnos_multi[slides].FP(author)
        FN += slideAnnos_multi[slides].FN(author)
    
    F1 = 2*TP/(2*TP+FP+FN+0.00001)
    F1s.append(F1)

    print(f'{author}: TP:{TP}, FP:{FP}, FN:{FN}, Sens:{TP/(TP+FN):.3f}, Prec:{TP/(TP+FP):.3f} F1:{F1:.3f}')
    
print('Total positives multinucleated:',TP+FN)
print('Median F1: ',np.median(F1s))


databases/BiMulti_VP1.sqlite: TP:23, FP:10, FN:18, Sens:0.561, Prec:0.697 F1:0.622
databases/BiMulti_VP2.sqlite: TP:20, FP:17, FN:21, Sens:0.488, Prec:0.541 F1:0.513
databases/BiMulti_VP3.sqlite: TP:22, FP:59, FN:19, Sens:0.537, Prec:0.272 F1:0.361
databases/BiMulti_VP4.sqlite: TP:21, FP:15, FN:20, Sens:0.512, Prec:0.583 F1:0.545
databases/BiMulti_VP5.sqlite: TP:26, FP:51, FN:15, Sens:0.634, Prec:0.338 F1:0.441
databases/BiMulti_VP6.sqlite: TP:15, FP:39, FN:26, Sens:0.366, Prec:0.278 F1:0.316
Total positives multinucleated: 41
Median F1:  0.4767491879152047
