/
tissue_detection.py
285 lines (226 loc) · 9.2 KB
/
tissue_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 18 03:29:24 2019.
@author: mtageld
"""
import numpy as np
from PIL import Image
from pandas import DataFrame
from histomicstk.annotations_and_masks.annotation_and_mask_utils import (
get_image_from_htk_response)
from histomicstk.preprocessing.color_deconvolution.color_deconvolution import (
color_deconvolution_routine)
from histomicstk.preprocessing.color_deconvolution.\
rgb_separate_stains_macenko_pca import rgb_separate_stains_macenko_pca
from histomicstk.preprocessing.color_deconvolution.find_stain_index import (
find_stain_index)
from histomicstk.annotations_and_masks.masks_to_annotations_handler import (
get_contours_from_mask, get_annotation_documents_from_contours)
import cv2
from skimage.filters import threshold_otsu, gaussian
from scipy import ndimage
Image.MAX_IMAGE_PIXELS = None
# %%===========================================================================
def get_slide_thumbnail(gc, slide_id):
"""Get slide thumbnail using girder client.
Parameters
-------------
gc : object
girder client to use
slide_id : str
girder ID of slide
Returns
---------
np array
RGB slide thumbnail at lowest level
"""
getStr = "/item/%s/tiles/thumbnail" % (slide_id)
resp = gc.get(getStr, jsonResp=False)
return get_image_from_htk_response(resp)
# %%===========================================================================
def _deconv_color(im, **kwargs):
"""Wrap around color_deconvolution_routine (compatibility)."""
Stains, _, _ = color_deconvolution_routine(im, **kwargs)
return Stains, 0
# %%===========================================================================
def get_tissue_mask(
thumbnail_im,
deconvolve_first=False, stain_unmixing_routine_kwargs={},
n_thresholding_steps=1, sigma=0., min_size=500):
"""Get binary tissue mask from slide thumbnail.
Parameters
-----------
thumbnail_im : np array
(m, n, 3) nd array of thumbnail RGB image
or (m, n) nd array of thumbnail grayscale image
deconvolve_first : bool
use hematoxylin channel to find cellular areas?
This will make things ever-so-slightly slower but is better in
getting rid of sharpie marker (if it's green, for example).
Sometimes things work better without it, though.
stain_matrix_method : str
see deconv_color method in seed_utils
n_thresholding_steps : int
number of gaussian smoothign steps
sigma : float
sigma of gaussian filter
min_size : int
minimum size (in pixels) of contiguous tissue regions to keep
Returns
--------
np bool array
largest contiguous tissue region.
np int32 array
each unique value represents a unique tissue region
"""
if deconvolve_first and (len(thumbnail_im.shape) == 3):
# deconvolvve to ge hematoxylin channel (cellular areas)
# hematoxylin channel return shows MINIMA so we invert
stain_unmixing_routine_kwargs['stains'] = ['hematoxylin', 'eosin']
Stains, _, _ = color_deconvolution_routine(
thumbnail_im, **stain_unmixing_routine_kwargs)
thumbnail = 255 - Stains[..., 0]
elif len(thumbnail_im.shape) == 3:
# grayscale thumbnail (inverted)
thumbnail = 255 - cv2.cvtColor(thumbnail_im, cv2.COLOR_BGR2GRAY)
else:
thumbnail = thumbnail_im
for _ in range(n_thresholding_steps):
# gaussian smoothing of grayscale thumbnail
if sigma > 0.0:
thumbnail = gaussian(
thumbnail, sigma=sigma,
output=None, mode='nearest', preserve_range=True)
# get threshold to keep analysis region
try:
thresh = threshold_otsu(thumbnail[thumbnail > 0])
except ValueError: # all values are zero
thresh = 0
# replace pixels outside analysis region with upper quantile pixels
thumbnail[thumbnail < thresh] = 0
# convert to binary
mask = 0 + (thumbnail > 0)
# find connected components
labeled, _ = ndimage.label(mask)
# only keep
unique, counts = np.unique(labeled[labeled > 0], return_counts=True)
discard = np.in1d(labeled, unique[counts < min_size])
discard = discard.reshape(labeled.shape)
labeled[discard] = 0
# largest tissue region
mask = labeled == unique[np.argmax(counts)]
return labeled, mask
# %%===========================================================================
def get_tissue_boundary_annotation_documents(
gc, slide_id, labeled,
color='rgb(0,0,0)', group='tissue', annprops=None):
"""Get annotation documents of tissue boundaries to visualize on DSA.
Parameters
-----------
gc : object
girder client to use
slide_id : str
girder ID of slide
labeled : np array
mask of tissue regions using slide thumbnail. This could either be
a binary mask or a mask where each unique value corresponds to one
tissue region. It will be binalized anyways. This can be obtained
using get_tissue_mask().
color : str
color to assign to boundaries. format like rgb(0,0,0)
group : str
label for annotations
annpops : dict
properties of annotation elements. Contains the following keys
F, X_OFFSET, Y_OFFSET, opacity, lineWidth. Refer to
get_single_annotation_document_from_contours() for details.
Returns
--------
list of dicts
each dict is an annotation document that you can post to DSA
"""
# Get annotations properties
if annprops is None:
slide_info = gc.get('item/%s/tiles' % slide_id)
annprops = {
'F': slide_info['sizeX'] / labeled.shape[1], # relative to base
'X_OFFSET': 0,
'Y_OFFSET': 0,
'opacity': 0,
'lineWidth': 4.0,
}
# Define GTCodes dataframe
GTCodes_df = DataFrame(columns=['group', 'GT_code', 'color'])
GTCodes_df.loc['tissue', 'group'] = group
GTCodes_df.loc['tissue', 'GT_code'] = 1
GTCodes_df.loc['tissue', 'color'] = color
# get annotation docs
contours_tissue = get_contours_from_mask(
MASK=0 + (labeled > 0), GTCodes_df=GTCodes_df,
get_roi_contour=False, MIN_SIZE=0, MAX_SIZE=None, verbose=False,
monitorPrefix="tissue: getting contours")
annotation_docs = get_annotation_documents_from_contours(
contours_tissue.copy(), docnamePrefix='test', annprops=annprops,
verbose=False, monitorPrefix="tissue : annotation docs")
return annotation_docs
# %%===========================================================================
def threshold_multichannel(
im, thresholds, channels=['hue', 'saturation', 'intensity'],
just_threshold=False, get_tissue_mask_kwargs=None):
"""Threshold a multi-channel image (eg. HSI image) to get tissue.
The relies on the fact that oftentimes some slide elements (eg blood
or whitespace) have a characteristic hue/saturation/intensity. This
thresholds along each HSI channel, then optionally uses the
get_tissue_mask() method (gaussian smoothing, otsu thresholding,
connected components) to get each contiguous tissue piece.
Parameters
-----------
im : np array
(m, n, 3) array of Hue, Saturation, Intensity (in this order)
thresholds : dict
Each entry is a dict containing the keys min and max
channels : list
names of channels, in order (eg. hue, saturation, intensity)
just_threshold : bool
if Fase, get_tissue_mask() is used to smooth result and get regions.
get_tissue_mask_kwargs : dict
key-value pairs of parameters to pass to get_tissue_mask()
Returns
--------
np int32 array
if not just_threshold, unique values represent unique tissue regions
np bool array
if not just_threshold, largest contiguous tissue region.
"""
if get_tissue_mask_kwargs is None:
get_tissue_mask_kwargs = {
'n_thresholding_steps': 1,
'sigma': 5.0,
'min_size': 10,
}
# threshold each channel
mask = np.ones(im.shape[:2])
for ax, ch in enumerate(channels):
channel = im[..., ax].copy()
mask[channel < thresholds[ch]['min']] = 0
mask[channel >= thresholds[ch]['max']] = 0
# smoothing, otsu thresholding then connected components
if just_threshold or (np.unique(mask).shape[0] < 1):
labeled = mask
else:
get_tissue_mask_kwargs['deconvolve_first'] = False
labeled, mask = get_tissue_mask(mask, **get_tissue_mask_kwargs)
return labeled, mask
# %%===========================================================================
def _get_largest_regions(labeled, top_n=10):
labeled_im = labeled.copy()
unique, counts = np.unique(labeled_im[labeled_im > 0], return_counts=True)
keep = unique[np.argsort(counts)[-top_n:]]
mask = np.zeros(labeled_im.shape)
keep_pixels = np.in1d(labeled_im, keep)
keep_pixels = keep_pixels.reshape(labeled_im.shape)
mask[keep_pixels] = 1
labeled_im[mask == 0] = 0
return labeled_im
# %%===========================================================================