Skip to content

Commit

Permalink
Speedup nuclei detection on small ROIs (#467)
Browse files Browse the repository at this point in the history
This commit significantly speeds up the Nuclei Detection and Nuclei Feature Extraction CLIs on small ROIs and adds some profiling code.

When i profiled the old code on an ROI of size 1000 x 1000 in a slide, i got the following stats

```
histomicsk import time = 0.991516113281 seconds
large_image import time = 1.90734863281e-06 seconds
Total packages import time = 1.27036499977 seconds
Dask setup time = 0.432317018509 seconds
low-res foreground mask computation time = 41.9267101288 seconds
time to find tiles with significant foreground = 0.321593999863 seconds 
nuclei detection time = 4.45551609993 seconds
Total analysis time = 47.8499279022 seconds
```
The `low-res foreground mask computation time`  step that takes the most of the time, reads the slide at a low resolution (approx 2048 x 2048) and computes a binary mask representing the foreground in slide. This is then used to throw away tiles falling in the large amount of white space in the borders and speedup the analysis of the whole-slide.

However, this step is an overkill when analyzing small ROIs. Hence, i modified the code to skip this step, and now the code is significantly faster on small ROIs (< 10 sec on 1k x 1k ROI).

Additionally, when analyzing the entire slide, I added the code to compute the reinhard color normalization statistics

Commit history
--------------------
* Add code to time different steps

* Fix some issues with profiling code

* Skip tile fgnd computation for ROIs and add profiling code to nuclei detection and feature extraction CLIs

* Fix PEP8 error

* Increase image-size back to 2048 x 2048 for finding fgnd mask as low-res

* Add a guard for bad regions.

Remove regions with non-finite weighted centroids (which happen when the
moment is zero).

Also, when using small regions, use a numpy array for the foreground
fraction list.

* Add code to compute reinhard color normalization stats

When processing the entire slide, use `histomicstk.preprocessing.color_normalization.reinhard_stats` to compute the mean and stddev of the slide in LAB color space from a random sample of foreground pixels.

* Use better defaults for nuclei detection min and max radius

* Fix style error

* Fix a bug in the segment_wsi_foreground_at_low_res function

* Add another data file so we can test with a small WSI.

* Add a test for NucleiDetection CLI on both ROI and whole-slide

* Fix style error

* Fix bug in cli_utils.segment_wsi_foreground_at_low_res

* Test with fix in large_image branch of PR #262

* Switch large_image back to master branch after merging the changes in PR #262 of large_image

Coauthored by @cdeepakroy @manthey
  • Loading branch information
cdeepakroy committed Mar 1, 2018
1 parent f763cb0 commit da21849
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 48 deletions.
2 changes: 2 additions & 0 deletions histomicstk/segmentation/nuclear/max_clustering.py
Expand Up @@ -68,6 +68,8 @@ def max_clustering(im_response, im_fgnd_mask, r=10):
# compute object properties
obj_props = skimage.measure.regionprops(im_label, im_response_nmzd)

obj_props = [prop for prop in obj_props if np.isfinite(prop.weighted_centroid).all()]

num_labels = len(obj_props)

# extract object seeds
Expand Down
2 changes: 2 additions & 0 deletions plugin.cmake
Expand Up @@ -142,6 +142,8 @@ add_histomicstk_python_test(cli_results
ENVIRONMENT
"CLI_LIST_ENTRYPOINT=${PROJECT_SOURCE_DIR}/plugins/slicer_cli_web/server/cli_list_entrypoint.py"
"CLI_CWD=${PROJECT_SOURCE_DIR}/plugins/HistomicsTK/server"
EXTERNAL_DATA
"plugins/HistomicsTK/TCGA-02-0010-01Z-00-DX4.07de2e55-a8fe-40ee-9e98-bcb78050b9f7-crop.tif"
)

# front-end tests
Expand Down
15 changes: 14 additions & 1 deletion plugin_tests/cli_results_test.py
Expand Up @@ -172,9 +172,11 @@ def testListCLI(self):
def testNucleiDetectionDefaults(self):
cli_args = (
'NucleiDetection',
os.path.join(TEST_DATA_DIR, 'Easy1.png'),
os.path.join(TEST_DATA_DIR, 'TCGA-02-0010-01Z-00-DX4.07de2e55-a8fe-40ee-9e98-bcb78050b9f7-crop.tif'), # noqa
'tmp_1.anot',
)

# test without ROI
cli_kwargs = {
'analysis_roi': '-1.0, -1.0, -1.0, -1.0',
}
Expand All @@ -185,6 +187,17 @@ def testNucleiDetectionDefaults(self):
},
})

# test with ROI
cli_kwargs = {
'analysis_roi': '0.0, 200.0, 512.0, 512.0'
}
self._runTest(cli_args, cli_kwargs, outputs={
'tmp_1.anot': {
'contains': ['elements'],
# 'hash': '02b240586412c87ad5cbf349b7c22f80f1df31eef54ed8ee4ad1fd3624a89fa2',
},
})

def testColorDeconvolutionDefaults(self):
self._runTest(
cli_args=[
Expand Down
@@ -0,0 +1 @@
9f2edb99e1817c84453f519491f89bdbe37185bc61e89582d528ca687540906f867144e9467ff17b59685d1c34231c3c96868c821276b3e13e940e080ebefacb
82 changes: 64 additions & 18 deletions server/ComputeNucleiFeatures/ComputeNucleiFeatures.py
Expand Up @@ -23,7 +23,8 @@
from cli_common import utils as cli_utils # noqa


def compute_tile_nuclei_features(slide_path, tile_position, args, **it_kwargs):
def compute_tile_nuclei_features(slide_path, tile_position, args, it_kwargs,
src_mu_lab=None, src_sigma_lab=None):

# get slide tile source
ts = large_image.getTileSource(slide_path)
Expand All @@ -40,7 +41,9 @@ def compute_tile_nuclei_features(slide_path, tile_position, args, **it_kwargs):
# perform color normalization
im_nmzd = htk_cnorm.reinhard(im_tile,
args.reference_mu_lab,
args.reference_std_lab)
args.reference_std_lab,
src_mu=src_mu_lab,
src_sigma=src_sigma_lab)

# perform color decovolution
w = cli_utils.get_stain_matrix(args)
Expand Down Expand Up @@ -103,7 +106,7 @@ def main(args):

print('\n>> CLI Parameters ...\n')

print args
print(args)

check_args(args)

Expand All @@ -119,9 +122,14 @@ def main(args):
#
print('\n>> Creating Dask client ...\n')

start_time = time.time()

c = cli_utils.create_dask_client(args)

print c
print(c)

dask_setup_time = time.time() - start_time
print('Dask setup time = {} seconds'.format(dask_setup_time))

#
# Read Input Image
Expand All @@ -132,20 +140,27 @@ def main(args):

ts_metadata = ts.getMetadata()

print json.dumps(ts_metadata, indent=2)
print(json.dumps(ts_metadata, indent=2))

is_wsi = ts_metadata['magnification'] is not None

#
# Compute tissue/foreground mask at low-res for whole slide images
#
if is_wsi:
if is_wsi and process_whole_image:

print('\n>> Computing tissue/foreground mask at low-res ...\n')

start_time = time.time()

im_fgnd_mask_lres, fgnd_seg_scale = \
cli_utils.segment_wsi_foreground_at_low_res(ts)

fgnd_time = time.time() - start_time

print('low-res foreground mask computation time = {}'.format(
cli_utils.disp_time_hms(fgnd_time)))

#
# Compute foreground fraction of tiles in parallel using Dask
#
Expand Down Expand Up @@ -174,12 +189,18 @@ def main(args):

num_tiles = ts.getSingleTile(**it_kwargs)['iterator_range']['position']

print 'Number of tiles = %d' % num_tiles
print('Number of tiles = {}'.format(num_tiles))

tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
**it_kwargs
)
if process_whole_image:

tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction(
args.inputImageFile, im_fgnd_mask_lres, fgnd_seg_scale,
**it_kwargs
)

else:

tile_fgnd_frac_list = [1.0] * num_tiles

num_fgnd_tiles = np.count_nonzero(
tile_fgnd_frac_list >= args.min_fgnd_frac)
Expand All @@ -188,10 +209,31 @@ def main(args):

fgnd_frac_comp_time = time.time() - start_time

print 'Number of foreground tiles = %d (%.2f%%)' % (
num_fgnd_tiles, percent_fgnd_tiles)
print('Number of foreground tiles = {0:d} ((1:2f)%%)'.format(
num_fgnd_tiles, percent_fgnd_tiles))

print 'Time taken = %s' % cli_utils.disp_time_hms(fgnd_frac_comp_time)
print('Tile foreground fraction computation time = {}'.format(
cli_utils.disp_time_hms(fgnd_frac_comp_time)))

#
# Compute reinhard stats for color normalization
#
src_mu_lab = None
src_sigma_lab = None

if is_wsi and process_whole_image:

print('\n>> Computing reinhard color normalization stats ...\n')

start_time = time.time()

src_mu_lab, src_sigma_lab = htk_cnorm.reinhard_stats(
args.inputImageFile, 0.01, magnification=args.analysis_mag)

rstats_time = time.time() - start_time

print('Reinhard stats computation time = {}'.format(
cli_utils.disp_time_hms(rstats_time)))

#
# Detect and compute nuclei features in parallel using Dask
Expand All @@ -213,7 +255,9 @@ def main(args):
cur_result = dask.delayed(compute_tile_nuclei_features)(
args.inputImageFile,
tile_position,
args, **it_kwargs)
args, it_kwargs,
src_mu_lab, src_sigma_lab
)

# append result to list
tile_result_list.append(cur_result)
Expand All @@ -229,8 +273,9 @@ def main(args):

nuclei_detection_time = time.time() - start_time

print 'Number of nuclei = ', len(nuclei_annot_list)
print "Time taken = %s" % cli_utils.disp_time_hms(nuclei_detection_time)
print('Number of nuclei = {}'.format(len(nuclei_annot_list)))
print('Nuclei detection time = {}'.format(
cli_utils.disp_time_hms(nuclei_detection_time)))

#
# Write annotation file
Expand Down Expand Up @@ -268,7 +313,8 @@ def main(args):

total_time_taken = time.time() - total_start_time

print 'Total analysis time = %s' % cli_utils.disp_time_hms(total_time_taken)
print('Total analysis time = {}'.format(
cli_utils.disp_time_hms(total_time_taken)))


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions server/ComputeNucleiFeatures/ComputeNucleiFeatures.xml
Expand Up @@ -183,14 +183,14 @@
<label>Minimum Radius</label>
<description>Minimum nuclear radius (used to set min sigma of the multiscale LoG filter)</description>
<longflag>min_radius</longflag>
<default>20</default>
<default>10</default>
</double>
<double>
<name>max_radius</name>
<label>Maximum Radius</label>
<description>Maximum nuclear radius (used to set max sigma of the multiscale LoG filter)</description>
<longflag>max_radius</longflag>
<default>30</default>
<default>20</default>
</double>
<double>
<name>local_max_search_radius</name>
Expand Down

0 comments on commit da21849

Please sign in to comment.