# Calculate Intra-Class-Correlation of both expert mitotic counts

To compare both data sets, we cannot use Cohen's kappa, since the TUPAC16 set does not include explicit hard negative examples

In [1]:
from sklearn.neighbors import KDTree
import numpy as np
from SlideRunner.dataAccess.database import Database
import pingouin as pg
import pandas as pd


    


  import pandas.util.testing as tm


Define matching of mitotic figure annotations

In [2]:
def match(dataset1 : np.ndarray, dataset2 : np.ndarray):


        center1_x = dataset1[:, 0] if dataset1.shape[0]>0 else np.array([])
        center1_y = dataset1[:, 1] if dataset1.shape[0]>0 else np.array([])

        center2_x = dataset2[:, 0] if dataset2.shape[0]>0 else np.array([])
        center2_y = dataset2[:, 1] if dataset2.shape[0]>0 else np.array([])
        
        uids1 = dataset1[:,2] if dataset1.shape[0]>0 else np.array([])
        uids2 = dataset2[:,2] if dataset2.shape[0]>0 else np.array([])
        
        mapping_DS1_to_DS2 = {}
        mapping_DS2_to_DS1 = {}
        
        isDS1 = np.zeros(center1_x.shape[0]+center2_x.shape[0])
        isDS1[0:center1_x.shape[0]]=1 # mark as detection, rest ist GT

        center_x = np.hstack((center1_x, center2_x))
        center_y = np.hstack((center1_y, center2_y))
        uids = np.hstack((uids1, uids2))

        radius=25
        
        # set up kdtree 
        X = np.dstack((center_x, center_y))[0]

        if (X.shape[0]==0):
            return 0,0,0,0,({},{},[],[])

        
        try:
            tree = KDTree(X)
        except:
            print('Shapes of X: ',X.shape)
            raise Error()

        ind = tree.query_radius(X, r=radius)

        annotationMatches = {x: 0 for x in np.where(isDS1==0)[0]}
        DetectionMatchesAnnotation = {x: 0 for x in np.where(isDS1==1)[0]}

        # check: already used results
        alreadyused=[]
        for i in ind:
            if len(i)==0:
                continue
            if np.any(isDS1[i]) and np.any(isDS1[i]==0):
                # at least 1 from DS1 and one from DS2 --> mark all as matches
                fromDS1 = i[isDS1[i]==1][0]
                fromDS2 = i[isDS1[i]==0][0]
                for j in range(len(i)):
                    if not isDS1[i][j]: # is from DS2
                        if i[j] not in annotationMatches:
                            print('Missing key ',j, 'in annotationWasDetected')
                            raise ValueError('Ijks')
                        annotationMatches[i[j]] = 1
                        mapping_DS2_to_DS1[uids[i[j]]] = uids[fromDS1]
                        mapping_DS1_to_DS2[uids[fromDS1]] = uids[i[j]]
                        
                    else:
                        if i[j] not in DetectionMatchesAnnotation:
                            print('Missing key ',j, 'in DetectionMatchesAnnotation')
                            raise ValueError('Ijks')

                        DetectionMatchesAnnotation[i[j]] = 1

        
        matching = int(np.sum([annotationMatches[x]==1 for x in annotationMatches.keys()]))
        nonlyDS2 = int(np.sum([annotationMatches[x]==0 for x in annotationMatches.keys()]))

        nonlyDS1 = int(np.sum([DetectionMatchesAnnotation[x]==0 for x in DetectionMatchesAnnotation.keys()]))
        onlyDS1 = uids[isDS1==1][[DetectionMatchesAnnotation[x]==0 for x in DetectionMatchesAnnotation.keys()]]
        onlyDS2 = uids[isDS1==0][[annotationMatches[x]==0 for x in annotationMatches.keys()]]
        
        F1 = 2*matching/(2*matching + nonlyDS1 + nonlyDS2)

        assert(matching==len(mapping_DS2_to_DS1.keys()))
        assert(nonlyDS1 == len(onlyDS1))
        assert(nonlyDS2 == len(onlyDS2))
        return F1, matching, nonlyDS1, nonlyDS2, (mapping_DS1_to_DS2, mapping_DS2_to_DS1,onlyDS1,onlyDS2)


In [3]:
DB_TUPAC = Database().open('TUPAC_stitched.sqlite')
DB = Database().open('TUPAC_alternativeLabels_augmented.sqlite')
rater1_cnt = []
rater2_cnt = []
for [fname,tupacSlideID] in DB_TUPAC.execute('SELECT filename,uid from Slides order by filename').fetchall():
    newDBSlide = DB.findSlideWithFilename(fname,'')
    DB.loadIntoMemory(newDBSlide)
    DB_TUPAC.loadIntoMemory(tupacSlideID)
    
    annos_tupac = np.array([[DB_TUPAC.annotations[x].x1,DB_TUPAC.annotations[x].y1,DB_TUPAC.annotations[x].uid] for x in DB_TUPAC.annotations.keys()])
    annos_tupac_al = np.array([[DB.annotations[x].x1,DB.annotations[x].y1,DB.annotations[x].uid] for x in DB.annotations.keys() if DB.annotations[x].agreedClass==1])
    annos_tupac_al_nonmit = np.array([[DB.annotations[x].x1,DB.annotations[x].y1,DB.annotations[x].uid] for x in DB.annotations.keys() if DB.annotations[x].agreedClass==2])

    mtch=match(annos_tupac_al,annos_tupac)
    mtch2=match(annos_tupac_al_nonmit,annos_tupac)
    print('%20s' % fname,'F1: %4f, matching: %4d, only TUPAC_AL: %4d, only TUPAC: %4d (%4d NM)' % (mtch[0:4]+mtch2[1:2]))
    
    # rater 1 = TUPAC
    
    rater1_cnt.append([mtch[1]+mtch[3]])
    rater2_cnt.append([mtch[1]+mtch[2]])
    

data = np.array([np.array(rater1_cnt+rater2_cnt).flatten(),np.array([1,]*73+[2,]*73), 1+np.array(list(range(73))*2)])

df = pd.DataFrame(data.T, 
             columns=['MC','rater','case'])

df.head()
icc = pg.intraclass_corr(data=df, targets='case', raters='rater',
                         ratings='MC').round(3)
icc.set_index("Type")

    01_stitched1.tif F1: 0.690476, matching:   58, only TUPAC_AL:   37, only TUPAC:   15 (  13 NM)
    02_stitched1.tif F1: 0.761905, matching:   32, only TUPAC_AL:   15, only TUPAC:    5 (   4 NM)
    03_stitched1.tif F1: 0.551724, matching:    8, only TUPAC_AL:    3, only TUPAC:   10 (   6 NM)
    04_stitched1.tif F1: 0.674033, matching:  183, only TUPAC_AL:  136, only TUPAC:   41 (  38 NM)
    05_stitched1.tif F1: 0.800000, matching:    4, only TUPAC_AL:    0, only TUPAC:    2 (   1 NM)
    06_stitched1.tif F1: 0.816901, matching:   87, only TUPAC_AL:   30, only TUPAC:    9 (   8 NM)
    07_stitched1.tif F1: 0.625698, matching:   56, only TUPAC_AL:   55, only TUPAC:   12 (  10 NM)
    08_stitched1.tif F1: 1.000000, matching:    3, only TUPAC_AL:    0, only TUPAC:    0 (   0 NM)
    09_stitched1.tif F1: 0.666667, matching:    1, only TUPAC_AL:    0, only TUPAC:    1 (   1 NM)
    10_stitched1.tif F1: 0.000000, matching:    0, only TUPAC_AL:    0, only TUPAC:    0 (   0 NM)
    11_sti

Unnamed: 0_level_0,Description,ICC,F,df1,df2,pval,CI95%
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ICC1,Single raters absolute,0.929,27.183,72,73,0.0,"[0.89, 0.95]"
ICC2,Single random raters,0.929,30.205,72,72,0.0,"[0.88, 0.96]"
ICC3,Single fixed raters,0.936,30.205,72,72,0.0,"[0.9, 0.96]"
ICC1k,Average raters absolute,0.963,27.183,72,73,0.0,"[0.94, 0.98]"
ICC2k,Average random raters,0.963,30.205,72,72,0.0,"[0.94, 0.98]"
ICC3k,Average fixed raters,0.967,30.205,72,72,0.0,"[0.95, 0.98]"
