Merge pull request #270 from Dana-Farber-AIOS/dev

v2.0.2
Dana-Farber-AIOS · Jan 6, 2022 · df49ac1 · df49ac1
2 parents a52f632 + e66a1d7
commit df49ac1
Show file tree

Hide file tree

Showing 10 changed files with 107 additions and 64 deletions.
diff --git a/.github/workflows/tests-conda.yml b/.github/workflows/tests-conda.yml
@@ -74,4 +74,5 @@ jobs:
       shell: bash -l {0}
       run: |
         cd docs
+        pip install -r readthedocs-requirements.txt
         make html
diff --git a/Dockerfile b/Dockerfile
@@ -44,7 +44,7 @@ COPY tests/ /opt/pathml/tests
 
 # install pathml and deepcell
 RUN pip3 install --upgrade pip \
-    && pip3 install numpy==1.19.5 \
+    && pip3 install numpy==1.19.5 spams \
     && pip3 install python-bioformats==4.0.0 deepcell /opt/pathml/ pytest
 
 # run tests to verify container

diff --git a/docs/readthedocs-requirements.txt b/docs/readthedocs-requirements.txt
@@ -1,7 +1,7 @@
-sphinx>=3.4.3
-nbsphinx>=0.8.1
-nbsphinx-link>=1.3.0
-sphinx-rtd-theme>=0.5.1
-sphinx-autoapi
-Ipython
-sphinx-copybutton
+sphinx==4.3.2
+nbsphinx==0.8.8
+nbsphinx-link==1.3.0
+sphinx-rtd-theme==1.0.0
+sphinx-autoapi==1.8.4
+ipython==7.30.1
+sphinx-copybutton==0.4.0
diff --git a/environment.yml b/environment.yml
@@ -8,30 +8,24 @@ dependencies:
     - pip==21.2.2
     - python==3.8
     - numpy==1.19.5
-    - scipy==1.7.1
+    - scipy==1.7.3
     - scikit-image==0.18.3
-    - matplotlib==3.1.3
+    - matplotlib==3.5.1
     - python-spams==2.6.1
     - openjdk==8.0.152
-    - pytorch==1.9.0
+    - pytorch==1.10.1
     - h5py==3.1.0
-    - dask==2021.7.1
-    - pydicom==2.1.2
+    - dask==2021.12.0
+    - pydicom==2.2.2
     - pytest==6.2.5
-    - pre-commit==2.13.0
+    - pre-commit==2.16.0
     - coverage==5.5
     - pip:
+        - python-bioformats==4.0.0
+        - python-javabridge==4.0.0
+        - deepcell==0.11.0
         - opencv-contrib-python==4.5.3.56
         - openslide-python==1.1.2
-        - javabridge==1.0.19
-        - python-bioformats==4.0.0
-        - scanpy==1.7.2
-        - anndata==0.7.6
-        - ipython==7.27.0
-        - sphinx==4.2.0
-        - nbsphinx==0.8.7
-        - nbsphinx-link==1.3.0
-        - sphinx-rtd-theme==1.0.0
-        - sphinx-autoapi==1.8.4
-        - sphinx-copybutton==0.4.0
-        - tqdm
+        - scanpy==1.8.2
+        - anndata==0.7.8
+        - tqdm==4.62.3
diff --git a/pathml/core/slide_backends.py b/pathml/core/slide_backends.py
@@ -5,8 +5,13 @@
 
 from io import BytesIO
 from typing import Tuple
+
 import numpy as np
 import openslide
+import pathml.core
+import pathml.core.tile
+from javabridge.jutil import JavaException
+from pathml.utils import pil_to_rgb
 from PIL import Image
 from pydicom.dataset import Dataset
 from pydicom.encaps import get_frame_offsets
@@ -15,11 +20,6 @@
 from pydicom.tag import SequenceDelimiterTag, TupleTag
 from pydicom.uid import UID
 from scipy.ndimage import zoom
-from javabridge.jutil import JavaException
-
-import pathml.core
-import pathml.core.tile
-from pathml.utils import pil_to_rgb
 
 try:
     import bioformats
@@ -310,7 +310,7 @@ def get_image_shape(self, level=None):
             ), f"input level {level} invalid for slide with {self.level_count} levels total"
             return self.shape_list[level][:2]
 
-    def extract_region(self, location, size, level=0):
+    def extract_region(self, location, size, level=0, series_as_channels=False):
         """
         Extract a region of the image. All bioformats images have 5 dimensions representing
         (x, y, z, channel, time). Even if an image does not have multiple z-series or time-series,
@@ -323,14 +323,12 @@ def extract_region(self, location, size, level=0):
             size (Tuple[int, int, ...]): (X,Y) size of each region. If an integer is passed, will convert to a
             tuple of (H, W) and extract a square region. If a tuple with len < 5 is passed, missing
                 dimensions will be retrieved in full.
-            level (int): level from which to extract chunks. Level 0 is highest resolution.
+            level (int): level from which to extract chunks. Level 0 is highest resolution. Defaults to 0.
+            series_as_channels (bool): Whether to treat image series as channels. If ``True``, multi-level images
+                are not supported. Defaults to ``False``.
 
         Returns:
-            np.ndarray: image at the specified region
-
-        Example:
-            Extract 2000x2000 x,y region from upper left corner of 7 channel, 2d fluorescent image.
-            data.slide.extract_region(location = (0,0), size = 2000)
+            np.ndarray: image at the specified region. 5-D array of (x, y, z, c, t)
         """
         if level is None:
             level = 0
@@ -359,6 +357,11 @@ def extract_region(self, location, size, level=0):
             raise ValueError(
                 f"input size {size} invalid. Must be a tuple of integer coordinates of len<2"
             )
+        if series_as_channels:
+            assert (
+                level == 0
+            ), f"Multi-level images not supported with series_as_channels=True. Input 'level={level}' invalid. Use 'level=0'."
+
         javabridge.start_vm(class_path=bioformats.JARS, max_heap_size="100G")
         with bioformats.ImageReader(str(self.filename), perform_init=True) as reader:
             # expand size
@@ -370,32 +373,35 @@ def extract_region(self, location, size, level=0):
             arrayshape = tuple(arrayshape)
             array = np.empty(arrayshape)
 
+            # read a very small region to check whether the image has channels incorrectly stored as series
             sample = reader.read(
                 z=0,
                 t=0,
                 series=level,
                 rescale=False,
-                XYWH=(location[0], location[1], size[0], size[1]),
+                XYWH=(location[0], location[1], 2, 2),
             )
 
-            if len(sample.shape) == 2:
+            # need this part because some facilities output images where the channels are incorrectly stored as series
+            # in this case we pull the image for each series, then stack them together as channels
+            if series_as_channels:
                 for z in range(self.shape_list[level][2]):
                     for c in range(self.shape_list[level][3]):
                         for t in range(self.shape_list[level][4]):
                             slicearray = reader.read(
                                 z=z,
                                 t=t,
-                                series=level,
+                                series=c,
                                 rescale=False,
                                 XYWH=(location[0], location[1], size[0], size[1]),
                             )
                             slicearray = np.asarray(slicearray)
                             # some file formats read x, y out of order, transpose
-                            if slicearray.shape[:2] != array.shape[:2]:
-                                slicearray = np.transpose(slicearray)
+                            slicearray = np.transpose(slicearray)
                             array[:, :, z, c, t] = slicearray
-            # if series is set to read all channels, read all c simultaneously
-            elif len(sample.shape) == 3:
+
+            # in this case, channels are correctly stored as channels, and we can support multi-level images as series
+            else:
                 for z in range(self.shape_list[level][2]):
                     for t in range(self.shape_list[level][4]):
                         slicearray = reader.read(
@@ -409,10 +415,13 @@ def extract_region(self, location, size, level=0):
                         # some file formats read x, y out of order, transpose
                         if slicearray.shape[:2] != array.shape[:2]:
                             slicearray = np.transpose(slicearray)
-                            slicearray = np.moveaxis(slicearray, 0, -1)
-                        array[:, :, z, :, t] = slicearray
-            else:
-                raise Exception("image format not supported")
+                            # in 2d undoes transpose
+                            if len(sample.shape) == 3:
+                                slicearray = np.moveaxis(slicearray, 0, -1)
+                        if len(sample.shape) == 3:
+                            array[:, :, z, :, t] = slicearray
+                        else:
+                            array[:, :, z, level, t] = slicearray
 
         array = array.astype(np.uint8)
         return array
@@ -448,7 +457,7 @@ def get_thumbnail(self, size=None):
             image_array = zoom(array, ratio)
         return image_array
 
-    def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
+    def generate_tiles(self, shape=3000, stride=None, pad=False, level=0, **kwargs):
         """
         Generator over tiles.
 
@@ -511,7 +520,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
                 if coords[0] + shape[0] < i and coords[1] + shape[1] < j:
                     # get image for tile
                     tile_im = self.extract_region(
-                        location=coords, size=shape, level=level
+                        location=coords, size=shape, level=level, **kwargs
                     )
                     yield pathml.core.tile.Tile(image=tile_im, coords=coords)
                 else:
@@ -520,7 +529,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
                         j - coords[1] if coords[1] + shape[1] > j else shape[1],
                     )
                     tile_im = self.extract_region(
-                        location=coords, size=unpaddedshape, level=level
+                        location=coords, size=unpaddedshape, level=level, **kwargs
                     )
                     zeroarrayshape = list(tile_im.shape)
                     zeroarrayshape[0], zeroarrayshape[1] = (

diff --git a/pathml/core/slide_data.py b/pathml/core/slide_data.py
@@ -257,6 +257,7 @@ def run(
         tile_pad=False,
         overwrite_existing_tiles=False,
         write_dir=None,
+        **kwargs,
     ):
         """
         Run a preprocessing pipeline on SlideData.
@@ -317,7 +318,11 @@ def run(
             processed_tile_futures = []
 
             for tile in self.generate_tiles(
-                level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
+                level=level,
+                shape=tile_size,
+                stride=tile_stride,
+                pad=tile_pad,
+                **kwargs,
             ):
                 if not tile.slide_type:
                     tile.slide_type = self.slide_type
@@ -338,7 +343,11 @@ def run(
 
         else:
             for tile in self.generate_tiles(
-                level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
+                level=level,
+                shape=tile_size,
+                stride=tile_stride,
+                pad=tile_pad,
+                **kwargs,
             ):
                 if not tile.slide_type:
                     tile.slide_type = self.slide_type

diff --git a/pathml/preprocessing/transforms.py b/pathml/preprocessing/transforms.py
@@ -12,12 +12,15 @@
 import pandas as pd
 import pathml.core
 import pathml.core.slide_data
-import spams
-from pathml.utils import (RGB_to_GREY, RGB_to_HSI, RGB_to_HSV, RGB_to_OD,
-                          normalize_matrix_cols)
+from pathml.utils import (
+    RGB_to_GREY,
+    RGB_to_HSI,
+    RGB_to_HSV,
+    RGB_to_OD,
+    normalize_matrix_cols,
+)
 from skimage import restoration
-from skimage.exposure import (equalize_adapthist, equalize_hist,
-                              rescale_intensity)
+from skimage.exposure import equalize_adapthist, equalize_hist, rescale_intensity
 from skimage.measure import regionprops_table
 
 
@@ -271,7 +274,10 @@ def F(self, image):
             image.ndim == 2
         ), f"input image has shape {image.shape}. Must convert to 1-channel image (H, W)."
         _, out = cv2.threshold(
-            src=image, thresh=self.threshold, maxval=self.max_value, type=self.type,
+            src=image,
+            thresh=self.threshold,
+            maxval=self.max_value,
+            type=self.type,
         )
         return out.astype(np.uint8)
 
@@ -597,6 +603,10 @@ class StainNormalizationHE(Transform):
             Default can be used, or you can also fit to a reference slide of your choosing by calling
             :meth:`~pathml.preprocessing.transforms.StainNormalizationHE.fit_to_reference`.
 
+    Note:
+        If using ``stain_estimation_method = "Vahadane"``, `spams <http://thoth.inrialpes.fr/people/mairal/spams/>`_
+        must be installed, along with all of its dependencies (i.e. libblas & liblapack).
+
     References:
         Macenko, M., Niethammer, M., Marron, J.S., Borland, D., Woosley, J.T., Guan, X., Schmitt, C. and Thomas, N.E.,
         2009, June. A method for normalizing histology slides for quantitative analysis. In 2009 IEEE International
@@ -635,6 +645,14 @@ def __init__(
             0 <= background_intensity <= 255
         ), f"Error: input background intensity {background_intensity} must be an integer between 0 and 255"
 
+        if stain_estimation_method.lower() == "vahadane":
+            try:
+                import spams
+            except (ImportError, ModuleNotFoundError):
+                raise Exception(
+                    "Vahadane method requires `spams` package to be installed"
+                )
+
         self.target = target.lower()
         self.stain_estimation_method = stain_estimation_method.lower()
         self.optical_density_threshold = optical_density_threshold
@@ -723,6 +741,10 @@ def _estimate_stain_vectors_vahadane(self, image, random_seed=0):
         Args:
             image (np.ndarray): RGB image
         """
+        try:
+            import spams
+        except (ImportError, ModuleNotFoundError):
+            raise Exception("Vahadane method requires `spams` package to be installed")
         # convert to Optical Density (OD) space
         image_OD = RGB_to_OD(image)
         # reshape to (M*N)x3
@@ -823,6 +845,10 @@ def _estimate_pixel_concentrations_lasso(self, image, stain_matrix):
             stain_matrix (np.ndarray): matrix of H and E stain vectors in optical density (OD) space.
                 Stain_matrix is (3, 2) and first column corresponds to hematoxylin by convention.
         """
+        try:
+            import spams
+        except (ImportError, ModuleNotFoundError):
+            raise Exception("Vahadane method requires `spams` package to be installed")
         image_OD = RGB_to_OD(image).reshape(-1, 3)
 
         # Get concentrations of each stain at each pixel

diff --git a/setup.py b/setup.py
@@ -32,7 +32,6 @@
         "openslide-python",
         "pydicom",
         "h5py",
-        "spams",
         "scikit-learn",
         "dask[distributed]",
         "anndata>=0.7.6",

diff --git a/tests/core_tests/test_slide_backends.py b/tests/core_tests/test_slide_backends.py
@@ -30,9 +30,14 @@ def dicom_backend():
 
 
 @pytest.mark.parametrize(
-    "backend", [openslide_backend(), bioformats_backend(), bioformats_backend_qptiff()]
+    "backend",
+    [
+        openslide_backend(),
+        bioformats_backend(),
+        bioformats_backend_qptiff(),
+    ],
 )
-@pytest.mark.parametrize("location", [(0, 0), (50, 100)])
+@pytest.mark.parametrize("location", [(0, 0), (50, 60)])
 @pytest.mark.parametrize("size", [50, (50, 100)])
 @pytest.mark.parametrize("level", [None, 0])
 def test_extract_region(backend, location, size, level):

diff --git a/tests/test_manuscript_urls.py b/tests/test_manuscript_urls.py
@@ -12,7 +12,7 @@
     [
         "https://www.pathml.org",
         # Vignettes
-        # "https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
+        "https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
         # docs
         "https://pathml.readthedocs.io/en/latest/",
     ],