In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import pickle
import numpy as np

from SlideRunner.dataAccess.database import Database
from lib.calculate_F1 import *
optimal_threshold, F1_values = {},{}

# Object detection (single shot) based on RetinaNet

## First: Clustering- and Object-Detection-augmented Expert Labelling (CODAEL)

We first optimize the threshold on the train+validation set, and then run inference on the test set.

In [2]:
databasefile = 'databases/MITOS_WSI_CMC_CODAEL_TR.sqlite'

resfile = f'results/trainval_RetinaNet-CMC-CODAEL-512sh-b1.pth-CODAEL-val-inference_results_boxes.p.bz2'#
ident = f'CODAEL'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = f'results/test_RetinaNet-CMC-CODAEL-512sh-b1.pth-CODAEL-val-inference_results_boxes.p.bz2'#

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])


Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.7737527242740178 Threshold= 0.6600000000000004
Calculating F1 for test set of 7 files
Overall: 
TP: 3917 FP: 1217 FN:  1446 F1: 0.7463084690864057


## Second: Object Detection-Augmented and Expert Labeled (ODAEL) data set variant

In this variant, an object-detection pipeline helped to find all mitotic figures and similar cells.

In [3]:
databasefile = 'databases/MITOS_WSI_CMC_ODAEL_TR.sqlite'

resfile = f'results/trainval_RetinaNet-CMC-ODAEL-512sh-b1.pth-ODAEL-val-inference_results_boxes.p.bz2'
ident = f'ODAEL'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = f'results/test_RetinaNet-CMC-ODAEL-512sh-b1.pth-ODAEL-val-inference_results_boxes.p.bz2'

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])



Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.7106227106227107 Threshold= 0.6800000000000004
Calculating F1 for test set of 7 files
Overall: 
TP: 3655 FP: 1335 FN:  1784 F1: 0.7009300987630646


## Lastly: The initial manual labeling by two experts (MEL)

In [4]:
databasefile = 'databases/MITOS_WSI_CMC_MEL.sqlite'
resfile = f'results/trainval_RetinaNet-CMC-MEL-512sh-b1.pth-MELshort-val-inference_results_boxes.p.bz2'
ident = f'MEL'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = f'results/test_RetinaNet-CMC-MEL-512sh-b1.pth-MELshort-val-inference_results_boxes.p.bz2'

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])



Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.7147628590514362 Threshold= 0.6600000000000004
Calculating F1 for test set of 7 files
Overall: 
TP: 3480 FP: 1349 FN:  1707 F1: 0.694888178913738


# Two-stage approach
In this approach, we use the results of the previous RetinaNet as a first stage, and a ResNet-18 classifier as secondary stage. It is being evaluated for all sets.


### First, again, the CODAEL data set variant.

In [5]:
databasefile = 'databases/MITOS_WSI_CMC_CODAEL_TR.sqlite'

resfile = 'results/trainval_2ndstage_RetinaNet-CMC-CODAEL-512sh-b1.pth-CODAEL-val-inference_results_boxes.p.bz2'
ident = f'CODAEL_2nd'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = 'results/test_2ndstage_RetinaNet-CMC-CODAEL-512sh-b1.pth-CODAEL-val-inference_results_boxes.p.bz2'

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])



Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.8384427089404359 Threshold= 0.5500000000000003
Calculating F1 for test set of 7 files
Overall: 
TP: 4075 FP: 873 FN:  1288 F1: 0.7904180001939676


### Then, the ODAEL without any clustering help, detected by the dual stage setup:

In [6]:
databasefile = 'databases/MITOS_WSI_CMC_ODAEL_TR.sqlite'

resfile = 'results/trainval_2ndstage_RetinaNet-CMC-ODAEL-512sh-b1.pth-ODAEL-val-inference_results_boxes.p.bz2'
ident = f'ODAEL_2nd'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = 'results/test_2ndstage_RetinaNet-CMC-ODAEL-512sh-b1.pth-ODAEL-val-inference_results_boxes.p.bz2'

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])



Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.8214025040705182 Threshold= 0.5800000000000003
Calculating F1 for test set of 7 files
Overall: 
TP: 4200 FP: 1272 FN:  1239 F1: 0.769865273577124


### And finally, the manual expert-labeled set with two stages

In [7]:

databasefile = 'databases/MITOS_WSI_CMC_MEL.sqlite'

resfile = 'results/trainval_2ndstage_RetinaNet-CMC-MEL-512sh-b1.pth-MELshort-val-inference_results_boxes.p.bz2'
ident = f'MEL_2nd'

optimal_threshold[ident], F1scores, thrs = optimize_threshold(databasefile=databasefile, minthres=0.3, resfile=resfile)

resfile = 'results/test_2ndstage_RetinaNet-CMC-MEL-512sh-b1.pth-MELshort-val-inference_results_boxes.p.bz2'

F1_values[ident], individ = calculate_F1(databasefile=databasefile, resfile=resfile, det_thres=optimal_threshold[ident])



Optimizing threshold for validation set of 14 files:  a8773be388e12df89edd.svs,460906c0b1fe17ea5354.svs,d0423ef9a648bb66a763.svs,50cf88e9a33df0c0c8f9.svs,da18e7b9846e9d38034c.svs,d7a8af121d7d4f3fbf01.svs,2191a7aa287ce1d5dbc0.svs,c4b95da36e32993289cb.svs,72c93e042d0171a61012.svs,4eee7b944ad5e46c60ce.svs,3d3d04eca056556b0b26.svs,084383c18b9060880e82.svs,d37ab62158945f22deed.svs,deb768e5efb9d1dcbc13.svs
Best threshold: F1= 0.7232958429130678 Threshold= 0.8600000000000005
Calculating F1 for test set of 7 files
Overall: 
TP: 3567 FP: 1293 FN:  1620 F1: 0.7100627052851598


### Print out table 2 in the manuscript

In [8]:
print(f'Single stage (RetinaNet, 512$\times$512) & {F1_values["MEL"]:.3f} & {F1_values["ODAEL"]:.3f} & {F1_values["CODAEL"]:.3f} \\')
print(f'Dual stage (RetinaNet + ResNet-18) & {F1_values["MEL_2nd"]:.3f} & {F1_values["ODAEL_2nd"]:.3f} & {F1_values["CODAEL_2nd"]:.3f} \\')

Single stage (RetinaNet, 512$	imes$512) & 0.695 & 0.701 & 0.746 \
Dual stage (RetinaNet + ResNet-18) & 0.710 & 0.770 & 0.790 \
