Skip to content

Commit

Permalink
Merge pull request #270 from Dana-Farber-AIOS/dev
Browse files Browse the repository at this point in the history
v2.0.2
  • Loading branch information
jacob-rosenthal committed Jan 6, 2022
2 parents a52f632 + e66a1d7 commit df49ac1
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 64 deletions.
1 change: 1 addition & 0 deletions .github/workflows/tests-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,5 @@ jobs:
shell: bash -l {0}
run: |
cd docs
pip install -r readthedocs-requirements.txt
make html
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ COPY tests/ /opt/pathml/tests

# install pathml and deepcell
RUN pip3 install --upgrade pip \
&& pip3 install numpy==1.19.5 \
&& pip3 install numpy==1.19.5 spams \
&& pip3 install python-bioformats==4.0.0 deepcell /opt/pathml/ pytest

# run tests to verify container
Expand Down
14 changes: 7 additions & 7 deletions docs/readthedocs-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
sphinx>=3.4.3
nbsphinx>=0.8.1
nbsphinx-link>=1.3.0
sphinx-rtd-theme>=0.5.1
sphinx-autoapi
Ipython
sphinx-copybutton
sphinx==4.3.2
nbsphinx==0.8.8
nbsphinx-link==1.3.0
sphinx-rtd-theme==1.0.0
sphinx-autoapi==1.8.4
ipython==7.30.1
sphinx-copybutton==0.4.0
30 changes: 12 additions & 18 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,24 @@ dependencies:
- pip==21.2.2
- python==3.8
- numpy==1.19.5
- scipy==1.7.1
- scipy==1.7.3
- scikit-image==0.18.3
- matplotlib==3.1.3
- matplotlib==3.5.1
- python-spams==2.6.1
- openjdk==8.0.152
- pytorch==1.9.0
- pytorch==1.10.1
- h5py==3.1.0
- dask==2021.7.1
- pydicom==2.1.2
- dask==2021.12.0
- pydicom==2.2.2
- pytest==6.2.5
- pre-commit==2.13.0
- pre-commit==2.16.0
- coverage==5.5
- pip:
- python-bioformats==4.0.0
- python-javabridge==4.0.0
- deepcell==0.11.0
- opencv-contrib-python==4.5.3.56
- openslide-python==1.1.2
- javabridge==1.0.19
- python-bioformats==4.0.0
- scanpy==1.7.2
- anndata==0.7.6
- ipython==7.27.0
- sphinx==4.2.0
- nbsphinx==0.8.7
- nbsphinx-link==1.3.0
- sphinx-rtd-theme==1.0.0
- sphinx-autoapi==1.8.4
- sphinx-copybutton==0.4.0
- tqdm
- scanpy==1.8.2
- anndata==0.7.8
- tqdm==4.62.3
61 changes: 35 additions & 26 deletions pathml/core/slide_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@

from io import BytesIO
from typing import Tuple

import numpy as np
import openslide
import pathml.core
import pathml.core.tile
from javabridge.jutil import JavaException
from pathml.utils import pil_to_rgb
from PIL import Image
from pydicom.dataset import Dataset
from pydicom.encaps import get_frame_offsets
Expand All @@ -15,11 +20,6 @@
from pydicom.tag import SequenceDelimiterTag, TupleTag
from pydicom.uid import UID
from scipy.ndimage import zoom
from javabridge.jutil import JavaException

import pathml.core
import pathml.core.tile
from pathml.utils import pil_to_rgb

try:
import bioformats
Expand Down Expand Up @@ -310,7 +310,7 @@ def get_image_shape(self, level=None):
), f"input level {level} invalid for slide with {self.level_count} levels total"
return self.shape_list[level][:2]

def extract_region(self, location, size, level=0):
def extract_region(self, location, size, level=0, series_as_channels=False):
"""
Extract a region of the image. All bioformats images have 5 dimensions representing
(x, y, z, channel, time). Even if an image does not have multiple z-series or time-series,
Expand All @@ -323,14 +323,12 @@ def extract_region(self, location, size, level=0):
size (Tuple[int, int, ...]): (X,Y) size of each region. If an integer is passed, will convert to a
tuple of (H, W) and extract a square region. If a tuple with len < 5 is passed, missing
dimensions will be retrieved in full.
level (int): level from which to extract chunks. Level 0 is highest resolution.
level (int): level from which to extract chunks. Level 0 is highest resolution. Defaults to 0.
series_as_channels (bool): Whether to treat image series as channels. If ``True``, multi-level images
are not supported. Defaults to ``False``.
Returns:
np.ndarray: image at the specified region
Example:
Extract 2000x2000 x,y region from upper left corner of 7 channel, 2d fluorescent image.
data.slide.extract_region(location = (0,0), size = 2000)
np.ndarray: image at the specified region. 5-D array of (x, y, z, c, t)
"""
if level is None:
level = 0
Expand Down Expand Up @@ -359,6 +357,11 @@ def extract_region(self, location, size, level=0):
raise ValueError(
f"input size {size} invalid. Must be a tuple of integer coordinates of len<2"
)
if series_as_channels:
assert (
level == 0
), f"Multi-level images not supported with series_as_channels=True. Input 'level={level}' invalid. Use 'level=0'."

javabridge.start_vm(class_path=bioformats.JARS, max_heap_size="100G")
with bioformats.ImageReader(str(self.filename), perform_init=True) as reader:
# expand size
Expand All @@ -370,32 +373,35 @@ def extract_region(self, location, size, level=0):
arrayshape = tuple(arrayshape)
array = np.empty(arrayshape)

# read a very small region to check whether the image has channels incorrectly stored as series
sample = reader.read(
z=0,
t=0,
series=level,
rescale=False,
XYWH=(location[0], location[1], size[0], size[1]),
XYWH=(location[0], location[1], 2, 2),
)

if len(sample.shape) == 2:
# need this part because some facilities output images where the channels are incorrectly stored as series
# in this case we pull the image for each series, then stack them together as channels
if series_as_channels:
for z in range(self.shape_list[level][2]):
for c in range(self.shape_list[level][3]):
for t in range(self.shape_list[level][4]):
slicearray = reader.read(
z=z,
t=t,
series=level,
series=c,
rescale=False,
XYWH=(location[0], location[1], size[0], size[1]),
)
slicearray = np.asarray(slicearray)
# some file formats read x, y out of order, transpose
if slicearray.shape[:2] != array.shape[:2]:
slicearray = np.transpose(slicearray)
slicearray = np.transpose(slicearray)
array[:, :, z, c, t] = slicearray
# if series is set to read all channels, read all c simultaneously
elif len(sample.shape) == 3:

# in this case, channels are correctly stored as channels, and we can support multi-level images as series
else:
for z in range(self.shape_list[level][2]):
for t in range(self.shape_list[level][4]):
slicearray = reader.read(
Expand All @@ -409,10 +415,13 @@ def extract_region(self, location, size, level=0):
# some file formats read x, y out of order, transpose
if slicearray.shape[:2] != array.shape[:2]:
slicearray = np.transpose(slicearray)
slicearray = np.moveaxis(slicearray, 0, -1)
array[:, :, z, :, t] = slicearray
else:
raise Exception("image format not supported")
# in 2d undoes transpose
if len(sample.shape) == 3:
slicearray = np.moveaxis(slicearray, 0, -1)
if len(sample.shape) == 3:
array[:, :, z, :, t] = slicearray
else:
array[:, :, z, level, t] = slicearray

array = array.astype(np.uint8)
return array
Expand Down Expand Up @@ -448,7 +457,7 @@ def get_thumbnail(self, size=None):
image_array = zoom(array, ratio)
return image_array

def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
def generate_tiles(self, shape=3000, stride=None, pad=False, level=0, **kwargs):
"""
Generator over tiles.
Expand Down Expand Up @@ -511,7 +520,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
if coords[0] + shape[0] < i and coords[1] + shape[1] < j:
# get image for tile
tile_im = self.extract_region(
location=coords, size=shape, level=level
location=coords, size=shape, level=level, **kwargs
)
yield pathml.core.tile.Tile(image=tile_im, coords=coords)
else:
Expand All @@ -520,7 +529,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
j - coords[1] if coords[1] + shape[1] > j else shape[1],
)
tile_im = self.extract_region(
location=coords, size=unpaddedshape, level=level
location=coords, size=unpaddedshape, level=level, **kwargs
)
zeroarrayshape = list(tile_im.shape)
zeroarrayshape[0], zeroarrayshape[1] = (
Expand Down
13 changes: 11 additions & 2 deletions pathml/core/slide_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def run(
tile_pad=False,
overwrite_existing_tiles=False,
write_dir=None,
**kwargs,
):
"""
Run a preprocessing pipeline on SlideData.
Expand Down Expand Up @@ -317,7 +318,11 @@ def run(
processed_tile_futures = []

for tile in self.generate_tiles(
level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
level=level,
shape=tile_size,
stride=tile_stride,
pad=tile_pad,
**kwargs,
):
if not tile.slide_type:
tile.slide_type = self.slide_type
Expand All @@ -338,7 +343,11 @@ def run(

else:
for tile in self.generate_tiles(
level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
level=level,
shape=tile_size,
stride=tile_stride,
pad=tile_pad,
**kwargs,
):
if not tile.slide_type:
tile.slide_type = self.slide_type
Expand Down
38 changes: 32 additions & 6 deletions pathml/preprocessing/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
import pandas as pd
import pathml.core
import pathml.core.slide_data
import spams
from pathml.utils import (RGB_to_GREY, RGB_to_HSI, RGB_to_HSV, RGB_to_OD,
normalize_matrix_cols)
from pathml.utils import (
RGB_to_GREY,
RGB_to_HSI,
RGB_to_HSV,
RGB_to_OD,
normalize_matrix_cols,
)
from skimage import restoration
from skimage.exposure import (equalize_adapthist, equalize_hist,
rescale_intensity)
from skimage.exposure import equalize_adapthist, equalize_hist, rescale_intensity
from skimage.measure import regionprops_table


Expand Down Expand Up @@ -271,7 +274,10 @@ def F(self, image):
image.ndim == 2
), f"input image has shape {image.shape}. Must convert to 1-channel image (H, W)."
_, out = cv2.threshold(
src=image, thresh=self.threshold, maxval=self.max_value, type=self.type,
src=image,
thresh=self.threshold,
maxval=self.max_value,
type=self.type,
)
return out.astype(np.uint8)

Expand Down Expand Up @@ -597,6 +603,10 @@ class StainNormalizationHE(Transform):
Default can be used, or you can also fit to a reference slide of your choosing by calling
:meth:`~pathml.preprocessing.transforms.StainNormalizationHE.fit_to_reference`.
Note:
If using ``stain_estimation_method = "Vahadane"``, `spams <http://thoth.inrialpes.fr/people/mairal/spams/>`_
must be installed, along with all of its dependencies (i.e. libblas & liblapack).
References:
Macenko, M., Niethammer, M., Marron, J.S., Borland, D., Woosley, J.T., Guan, X., Schmitt, C. and Thomas, N.E.,
2009, June. A method for normalizing histology slides for quantitative analysis. In 2009 IEEE International
Expand Down Expand Up @@ -635,6 +645,14 @@ def __init__(
0 <= background_intensity <= 255
), f"Error: input background intensity {background_intensity} must be an integer between 0 and 255"

if stain_estimation_method.lower() == "vahadane":
try:
import spams
except (ImportError, ModuleNotFoundError):
raise Exception(
"Vahadane method requires `spams` package to be installed"
)

self.target = target.lower()
self.stain_estimation_method = stain_estimation_method.lower()
self.optical_density_threshold = optical_density_threshold
Expand Down Expand Up @@ -723,6 +741,10 @@ def _estimate_stain_vectors_vahadane(self, image, random_seed=0):
Args:
image (np.ndarray): RGB image
"""
try:
import spams
except (ImportError, ModuleNotFoundError):
raise Exception("Vahadane method requires `spams` package to be installed")
# convert to Optical Density (OD) space
image_OD = RGB_to_OD(image)
# reshape to (M*N)x3
Expand Down Expand Up @@ -823,6 +845,10 @@ def _estimate_pixel_concentrations_lasso(self, image, stain_matrix):
stain_matrix (np.ndarray): matrix of H and E stain vectors in optical density (OD) space.
Stain_matrix is (3, 2) and first column corresponds to hematoxylin by convention.
"""
try:
import spams
except (ImportError, ModuleNotFoundError):
raise Exception("Vahadane method requires `spams` package to be installed")
image_OD = RGB_to_OD(image).reshape(-1, 3)

# Get concentrations of each stain at each pixel
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
"openslide-python",
"pydicom",
"h5py",
"spams",
"scikit-learn",
"dask[distributed]",
"anndata>=0.7.6",
Expand Down
9 changes: 7 additions & 2 deletions tests/core_tests/test_slide_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@ def dicom_backend():


@pytest.mark.parametrize(
"backend", [openslide_backend(), bioformats_backend(), bioformats_backend_qptiff()]
"backend",
[
openslide_backend(),
bioformats_backend(),
bioformats_backend_qptiff(),
],
)
@pytest.mark.parametrize("location", [(0, 0), (50, 100)])
@pytest.mark.parametrize("location", [(0, 0), (50, 60)])
@pytest.mark.parametrize("size", [50, (50, 100)])
@pytest.mark.parametrize("level", [None, 0])
def test_extract_region(backend, location, size, level):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manuscript_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
[
"https://www.pathml.org",
# Vignettes
# "https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
"https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
# docs
"https://pathml.readthedocs.io/en/latest/",
],
Expand Down

0 comments on commit df49ac1

Please sign in to comment.