From 2baa344bce073bd4795275a8cea1dfbac26f8a05 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 12 Apr 2022 01:18:37 +0000 Subject: [PATCH 01/26] Redesign BaseWSIReader, WSIReader, CuCIMWSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 278 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 monai/data/wsi_reader.py diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py new file mode 100644 index 0000000000..5b69cf469e --- /dev/null +++ b/monai/data/wsi_reader.py @@ -0,0 +1,278 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import abstractmethod +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +import numpy as np + +from monai.config import DtypeLike, PathLike +from monai.data.image_reader import ImageReader +from monai.data.utils import is_supported_format +from monai.transforms.utility.array import EnsureChannelFirst +from monai.utils import ensure_tuple, optional_import, require_pkg + +CuImage, _ = optional_import("cucim", name="CuImage") + +__all__ = ["BaseWSIReader", "WSIReader", "CuCIMWSIReader"] + + +class BaseWSIReader(ImageReader): + """ + An abstract class defines APIs to load whole slide image files. + + Typical usage of an implementation of this class is: + + .. code-block:: python + + image_reader = MyWSIReader() + wsi = image_reader.read(path_to_image) + img_data, meta_data = image_reader.get_data(wsi) + + - The `read` call converts image filenames into image objects, + - The `get_data` call fetches the image data, as well as meta data. + - A reader should implement `verify_suffix` with the logic of checking the input filename + by the filename extensions. + + """ + + supported_formats: List[str] = [] + + def __init__(self, level: int, **kwargs): + super().__init__() + self.level = level + self.kwargs = kwargs + self.metadata: Dict[Any, Any] = {} + + @property + @abstractmethod + def _reader(self): + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def _get_size(self, wsi, level): + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def _get_level_count(self, wsi): + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + """ + Read image data from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args for backend reading API in `read()`, will override `self.kwargs` for existing keys. + more details in `cuCIM`: https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. + + Returns: + image object or list of image objects + + """ + wsi_list: List = [] + + filenames: Sequence[PathLike] = ensure_tuple(data) + kwargs_ = self.kwargs.copy() + kwargs_.update(kwargs) + for filename in filenames: + wsi = self._reader(filename, **kwargs_) + wsi_list.append(wsi) + + return wsi_list if len(filenames) > 1 else wsi_list[0] + + def get_data( + self, + wsi, + location: Tuple[int, int] = (0, 0), + size: Optional[Tuple[int, int]] = None, + level: Optional[int] = None, + dtype: DtypeLike = np.uint8, + ): + """ + Extract patchs as numpy array from WSI image and return them. + + Args: + wsi: a whole slide image object loaded from a file + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, + or list of tuples (default=(0, 0)) + size: (height, width) tuple giving the patch size, or list of tuples (default to full image size) + This is the size of image at the given level (`level`) + level: the level number, or list of level numbers (default=0) + dtype: the data type of output image + + Returns: + a tuples, where the first element is an image [CxHxW], and second element is a dictionary of metadata + """ + # Verify magnification level + if level is None: + level = self.level + max_level = self._get_level_count(wsi) - 1 + if level > max_level: + raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") + + # Verify location + if location is None: + location = (0, 0) + wsi_size = self._get_size(wsi, level) + if location[0] > wsi_size[0] or location[1] > wsi_size[1]: + raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") + + # Verify size + if size is None: + if location != (0, 0): + raise ValueError("Patch size should be defined to exctract patches.") + size = self._get_size(wsi, level) + else: + if size[0] <= 0 or size[1] <= 0: + raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") + + # Extract a patch or the entire image + patch = self._get_patch(wsi, location=location, size=size, level=level, dtype=dtype) + + # Verify patch image + patch = self._verify_output(patch) + + # Set patch-related metadata + metadata = self._get_metadata(wsi=wsi, patch=patch, location=location, size=size, level=level) + + return patch, metadata + + def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: + """ + Verify whether the specified file or files format is supported by WSI reader. + + Args: + filename: file name or a list of file names to read. + if a list of files, verify all the suffixes. + """ + return is_supported_format(filename, self.supported_formats) + + def _verify_output(self, patch: np.ndarray): + """ + Verify image output + """ + # check if the image has three dimensions (2D + color) + if patch.ndim != 3: + raise ValueError( + f"The image dimension should be 3 but has {patch.ndim}. " + "`WSIReader` is designed to work only with 2D colored images." + ) + + # check if the color channel is 3 (RGB) or 4 (RGBA) + if patch.shape[0] not in [3, 4]: + raise ValueError( + f"The image should have three or four color channels but has {patch.shape[0]}. " + "`WSIReader` is designed to work only with 2D colored images." + ) + + # remove alpha channel if exist (RGBA) + if patch.shape[0] > 3: + patch = patch[:3] + + return patch + + +class WSIReader(BaseWSIReader): + def __init__(self, backend="cucim", level: int = 0, **kwargs): + super().__init__(level, **kwargs) + self.backend = backend.lower() + if self.backend == "cucim": + self.backend_lib = CuCIMWSIReader(level=level, **kwargs) + else: + raise ValueError("The supported backends are: cucim") + self.supported_formats = self.backend_lib.supported_formats + + @property + def _reader(self): + return self.backend_lib._reader + + def _get_level_count(self, wsi): + return self.backend_lib._get_level_count(wsi) + + def _get_size(self, wsi, level): + return self.backend_lib._get_size(wsi, level) + + def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + return self.backend_lib._get_metadata(wsi=wsi, patch=patch, size=size, location=location, level=level) + + def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): + return self.backend_lib._get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype) + + +@require_pkg(pkg_name="cucim") +class CuCIMWSIReader(BaseWSIReader): + """ + Read whole slide images and extract patches without loading the whole slide image into the memory. + + Args: + level: the whole slide image level at which the image is extracted. (default=0) + This is overridden if the level argument is provided in `get_data`. + kwargs: additional args for backend reading API in `read()`, more details in `cuCIM`: + https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. + + """ + + supported_formats = ["tif", "tiff", "svs"] + + def __init__(self, level: int = 0, **kwargs): + super().__init__(level, **kwargs) + + @property + def _reader(self): + return CuImage + + @staticmethod + def _get_level_count(wsi): + return wsi.resolutions["level_count"] + + @staticmethod + def _get_size(wsi, level): + return wsi.resolutions["level_dimensions"][level][::-1] + + def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + metadata: Dict = { + "backend": "cucim", + "spatial_shape": np.asarray(patch.shape[1:]), + "original_channel_dim": -1, + "location": location, + "size": size, + "level": level, + } + return metadata + + def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): + """ + Extract a patch based on given output from the given whole slide image + Args: + + Returns: + a numpy array with dimesion of [3xWxH] + """ + # extract a patch (or the entire image) + # reverse the order of location and size to become WxH for cuCIM + patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) + + # convert to numpy + patch = np.asarray(patch, dtype=dtype) + + # make it channel first + patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) + + return patch From e46fea00a6ef5880bfc3735f47692a1548e29f0c Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 12 Apr 2022 01:18:45 +0000 Subject: [PATCH 02/26] Add unittests for WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_wsireader_new.py | 190 ++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 tests/test_wsireader_new.py diff --git a/tests/test_wsireader_new.py b/tests/test_wsireader_new.py new file mode 100644 index 0000000000..26f175b7ae --- /dev/null +++ b/tests/test_wsireader_new.py @@ -0,0 +1,190 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +from unittest import skipUnless + +import numpy as np +import torch +from numpy.testing import assert_array_equal +from parameterized import parameterized + +from monai.data import DataLoader, Dataset +from monai.data.wsi_reader import WSIReader +from monai.transforms import Compose, LoadImaged, ToTensord +from monai.utils import first, optional_import +from monai.utils.enums import PostFix +from tests.utils import download_url_or_skip_test, testing_data_config + +cucim, has_cucim = optional_import("cucim") +has_cucim = has_cucim and hasattr(cucim, "CuImage") +openslide, has_osl = optional_import("openslide") +imwrite, has_tiff = optional_import("tifffile", name="imwrite") +_, has_codec = optional_import("imagecodecs") +has_tiff = has_tiff and has_codec + +FILE_KEY = "wsi_img" +FILE_URL = testing_data_config("images", FILE_KEY, "url") +base_name, extension = os.path.basename(f"{FILE_URL}"), ".tiff" +FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) + +HEIGHT = 32914 +WIDTH = 46000 + +TEST_CASE_0 = [FILE_PATH, 2, (3, HEIGHT // 4, WIDTH // 4)] + +TEST_CASE_TRANSFORM_0 = [FILE_PATH, 4, (HEIGHT // 16, WIDTH // 16), (1, 3, HEIGHT // 16, WIDTH // 16)] + +TEST_CASE_1 = [ + FILE_PATH, + {"location": (HEIGHT // 2, WIDTH // 2), "size": (2, 1), "level": 0}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), +] + +TEST_CASE_2 = [ + FILE_PATH, + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), +] + + +TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW + +TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW + +TEST_CASE_ERROR_GRAY = [np.ones((16, 16), dtype=np.uint8)] # no color channel +TEST_CASE_ERROR_3D = [np.ones((16, 16, 16, 3), dtype=np.uint8)] # 3D + color + + +def save_rgba_tiff(array: np.ndarray, filename: str, mode: str): + """ + Save numpy array into a TIFF RGB/RGBA file + + Args: + array: numpy ndarray with the shape of CxHxW and C==3 representing a RGB image + filename: the filename to be used for the tiff file. '_RGB.tiff' or '_RGBA.tiff' will be appended to this filename. + mode: RGB or RGBA + """ + if mode == "RGBA": + array = np.concatenate([array, 255 * np.ones_like(array[0])[np.newaxis]]).astype(np.uint8) + + img_rgb = array.transpose(1, 2, 0) + imwrite(filename, img_rgb, shape=img_rgb.shape, tile=(16, 16)) + + return filename + + +def save_gray_tiff(array: np.ndarray, filename: str): + """ + Save numpy array into a TIFF file + + Args: + array: numpy ndarray with any shape + filename: the filename to be used for the tiff file. + """ + img_gray = array + imwrite(filename, img_gray, shape=img_gray.shape, photometric="rgb") + + return filename + + +@skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") +def setUpModule(): # noqa: N802 + hash_type = testing_data_config("images", FILE_KEY, "hash_type") + hash_val = testing_data_config("images", FILE_KEY, "hash_val") + download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) + + +class WSIReaderTests: + class Tests(unittest.TestCase): + backend = None + + @parameterized.expand([TEST_CASE_0]) + def test_read_whole_image(self, file_path, level, expected_shape): + reader = WSIReader(self.backend, level=level) + with reader.read(file_path) as img_obj: + img = reader.get_data(img_obj)[0] + self.assertTupleEqual(img.shape, expected_shape) + + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + def test_read_region(self, file_path, patch_info, expected_img): + kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} + reader = WSIReader(self.backend, **kwargs) + with reader.read(file_path, **kwargs) as img_obj: + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) + @skipUnless(has_tiff, "Requires tifffile.") + def test_read_rgba(self, img_expected): + # skip for OpenSlide since not working with images without tiles + if self.backend == "openslide": + return + image = {} + reader = WSIReader(self.backend) + for mode in ["RGB", "RGBA"]: + file_path = save_rgba_tiff( + img_expected, + os.path.join(os.path.dirname(__file__), "testing_data", f"temp_tiff_image_{mode}.tiff"), + mode=mode, + ) + with reader.read(file_path) as img_obj: + image[mode], _ = reader.get_data(img_obj) + + self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) + self.assertIsNone(assert_array_equal(image["RGBA"], img_expected)) + + @parameterized.expand([TEST_CASE_ERROR_GRAY, TEST_CASE_ERROR_3D]) + @skipUnless(has_tiff, "Requires tifffile.") + def test_read_malformats(self, img_expected): + reader = WSIReader(self.backend) + file_path = save_gray_tiff( + img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") + ) + with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): + with reader.read(file_path) as img_obj: + reader.get_data(img_obj) + + @parameterized.expand([TEST_CASE_TRANSFORM_0]) + def test_with_dataloader(self, file_path, level, expected_spatial_shape, expected_shape): + train_transform = Compose( + [ + LoadImaged(keys=["image"], reader=WSIReader, backend=self.backend, level=level), + ToTensord(keys=["image"]), + ] + ) + dataset = Dataset([{"image": file_path}], transform=train_transform) + data_loader = DataLoader(dataset) + data: dict = first(data_loader) + for s in data[PostFix.meta("image")]["spatial_shape"]: + torch.testing.assert_allclose(s, expected_spatial_shape) + self.assertTupleEqual(data["image"].shape, expected_shape) + + +@skipUnless(has_cucim, "Requires cucim") +class TestCuCIM(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "cucim" + + +if __name__ == "__main__": + unittest.main() From f946a631d98c2028b00ec467cd6892671fd6bcee Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 12 Apr 2022 20:35:03 +0000 Subject: [PATCH 03/26] Add image mode for output validation Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 5b69cf469e..94e7c98c3d 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -104,6 +104,7 @@ def get_data( size: Optional[Tuple[int, int]] = None, level: Optional[int] = None, dtype: DtypeLike = np.uint8, + mode: str = "RGB", ): """ Extract patchs as numpy array from WSI image and return them. @@ -147,7 +148,7 @@ def get_data( patch = self._get_patch(wsi, location=location, size=size, level=level, dtype=dtype) # Verify patch image - patch = self._verify_output(patch) + patch = self._verify_output(patch, mode) # Set patch-related metadata metadata = self._get_metadata(wsi=wsi, patch=patch, location=location, size=size, level=level) @@ -164,7 +165,7 @@ def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: """ return is_supported_format(filename, self.supported_formats) - def _verify_output(self, patch: np.ndarray): + def _verify_output(self, patch: np.ndarray, mode: str): """ Verify image output """ @@ -172,18 +173,20 @@ def _verify_output(self, patch: np.ndarray): if patch.ndim != 3: raise ValueError( f"The image dimension should be 3 but has {patch.ndim}. " - "`WSIReader` is designed to work only with 2D colored images." + "`WSIReader` is designed to work only with 2D images with color channel." ) # check if the color channel is 3 (RGB) or 4 (RGBA) - if patch.shape[0] not in [3, 4]: + if mode == "RGBA" and patch.shape[0] != 4: raise ValueError( - f"The image should have three or four color channels but has {patch.shape[0]}. " - "`WSIReader` is designed to work only with 2D colored images." + f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." ) - # remove alpha channel if exist (RGBA) - if patch.shape[0] > 3: + if mode in "RGB": + if patch.shape[0] not in [3, 4]: + raise ValueError( + f"The image is expected to have three or four color channels in '{mode}' mode but has {patch.shape[0]}. " + ) patch = patch[:3] return patch From 9c15ea31074b38dae889d328be8a7f3504d4162e Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:08:16 +0000 Subject: [PATCH 04/26] Update docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/source/data.rst b/docs/source/data.rst index 0910001783..dcb0cad2d7 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -152,9 +152,22 @@ PILReader .. autoclass:: PILReader :members: +Whole slide image reader +------------------------ + +BaseWSIReader +~~~~~~~~~~~ +.. autoclass:: BaseWSIReader + :members: + WSIReader -~~~~~~~~~ -.. autoclass:: WSIReader +~~~~~~~~~~~ +.. autoclass:: Reader + :members: + +CuCIMWSIReader +~~~~~~~~~~~ +.. autoclass:: CuCIMWSIReader :members: Image writer From e00453805728c4f97e09277f32f3187ca411c153 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:09:00 +0000 Subject: [PATCH 05/26] Update references to new WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/apps/pathology/data/datasets.py | 2 +- monai/apps/pathology/metrics/lesion_froc.py | 2 +- monai/data/__init__.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py index 71f3214ea4..756223a784 100644 --- a/monai/apps/pathology/data/datasets.py +++ b/monai/apps/pathology/data/datasets.py @@ -16,7 +16,7 @@ import numpy as np from monai.data import Dataset, SmartCacheDataset -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.utils import ensure_tuple_rep __all__ = ["PatchWSIDataset", "SmartCachePatchWSIDataset", "MaskedInferenceWSIDataset"] diff --git a/monai/apps/pathology/metrics/lesion_froc.py b/monai/apps/pathology/metrics/lesion_froc.py index 6073bd0cda..e48f2128fe 100644 --- a/monai/apps/pathology/metrics/lesion_froc.py +++ b/monai/apps/pathology/metrics/lesion_froc.py @@ -14,7 +14,7 @@ import numpy as np from monai.apps.pathology.utils import PathologyProbNMS, compute_isolated_tumor_cells, compute_multi_instance_mask -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.metrics import compute_fp_tp_probs, compute_froc_curve_data, compute_froc_score from monai.utils import min_version, optional_import diff --git a/monai/data/__init__.py b/monai/data/__init__.py index cdab2a1037..247b58ffd1 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -34,7 +34,8 @@ from .folder_layout import FolderLayout from .grid_dataset import GridPatchDataset, PatchDataset, PatchIter, PatchIterd from .image_dataset import ImageDataset -from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader, WSIReader +from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader +from .wsi_reader import WSIReader, CuCIMWSIReader from .image_writer import ( SUPPORTED_WRITERS, ImageWriter, From df0a61e61debdb95a497b65e435085c19878bd6f Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:09:31 +0000 Subject: [PATCH 06/26] Remove legacy WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/image_reader.py | 264 +------------------------------------ 1 file changed, 1 insertion(+), 263 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index ca77178e0b..41bf92c2ed 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -39,7 +39,7 @@ CuImage, _ = optional_import("cucim", name="CuImage") TiffFile, _ = optional_import("tifffile", name="TiffFile") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader"] class ImageReader(ABC): @@ -714,265 +714,3 @@ def _get_spatial_shape(self, img): img: a PIL Image object loaded from an image file. """ return np.asarray((img.width, img.height)) - - -class WSIReader(ImageReader): - """ - Read whole slide images and extract patches. - - Args: - backend: backend library to load the images, available options: "cuCIM", "OpenSlide" and "TiffFile". - level: the whole slide image level at which the image is extracted. (default=0) - This is overridden if the level argument is provided in `get_data`. - kwargs: additional args for backend reading API in `read()`, more details in `cuCIM`, `TiffFile`, `OpenSlide`: - https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. - https://github.com/cgohlke/tifffile. - https://openslide.org/api/python/#openslide.OpenSlide. - - Note: - While "cuCIM" and "OpenSlide" backends both can load patches from large whole slide images - without loading the entire image into memory, "TiffFile" backend needs to load the entire image into memory - before extracting any patch; thus, memory consideration is needed when using "TiffFile" backend for - patch extraction. - - """ - - def __init__(self, backend: str = "OpenSlide", level: int = 0, **kwargs): - super().__init__() - self.backend = backend.lower() - func = require_pkg(self.backend)(self._set_reader) - self.wsi_reader = func(self.backend) - self.level = level - self.kwargs = kwargs - - @staticmethod - def _set_reader(backend: str): - if backend == "openslide": - return OpenSlide - if backend == "cucim": - return CuImage - if backend == "tifffile": - return TiffFile - raise ValueError("`backend` should be 'cuCIM', 'OpenSlide' or 'TiffFile'.") - - def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: - """ - Verify whether the specified file or files format is supported by WSI reader. - - Args: - filename: file name or a list of file names to read. - if a list of files, verify all the suffixes. - """ - return is_supported_format(filename, ["tif", "tiff"]) - - def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): - """ - Read image data from given file or list of files. - - Args: - data: file name or a list of file names to read. - kwargs: additional args for backend reading API in `read()`, will override `self.kwargs` for existing keys. - more details in `cuCIM`, `TiffFile`, `OpenSlide`: - https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. - https://github.com/cgohlke/tifffile. - https://openslide.org/api/python/#openslide.OpenSlide. - - Returns: - image object or list of image objects - - """ - img_: List = [] - - filenames: Sequence[PathLike] = ensure_tuple(data) - kwargs_ = self.kwargs.copy() - kwargs_.update(kwargs) - for name in filenames: - img = self.wsi_reader(name, **kwargs_) - if self.backend == "openslide": - img.shape = (img.dimensions[1], img.dimensions[0], 3) - img_.append(img) - - return img_ if len(filenames) > 1 else img_[0] - - def get_data( - self, - img, - location: Tuple[int, int] = (0, 0), - size: Optional[Tuple[int, int]] = None, - level: Optional[int] = None, - dtype: DtypeLike = np.uint8, - grid_shape: Tuple[int, int] = (1, 1), - patch_size: Optional[Union[int, Tuple[int, int]]] = None, - ): - """ - Extract regions as numpy array from WSI image and return them. - - Args: - img: a WSIReader image object loaded from a file, or list of CuImage objects - location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, - or list of tuples (default=(0, 0)) - size: (height, width) tuple giving the region size, or list of tuples (default to full image size) - This is the size of image at the given level (`level`) - level: the level number, or list of level numbers (default=0) - dtype: the data type of output image - grid_shape: (row, columns) tuple define a grid to extract patches on that - patch_size: (height, width) the size of extracted patches at the given level - """ - # Verify inputs - if level is None: - level = self.level - max_level = self._get_max_level(img) - if level > max_level: - raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") - - # Extract a region or the entire image - region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) - - # Add necessary metadata - metadata: Dict = {} - metadata["spatial_shape"] = np.asarray(region.shape[:-1]) - metadata["original_channel_dim"] = -1 - - # Make it channel first - region = EnsureChannelFirst()(region, metadata) - - # Split into patches - if patch_size is None: - patches = region - else: - tuple_patch_size = ensure_tuple_rep(patch_size, 2) - patches = self._extract_patches( - region, patch_size=tuple_patch_size, grid_shape=grid_shape, dtype=dtype # type: ignore - ) - - return patches, metadata - - def _get_max_level(self, img_obj): - """ - Return the maximum number of levels in the whole slide image - Args: - img: the whole slide image object - - """ - if self.backend == "openslide": - return img_obj.level_count - 1 - if self.backend == "cucim": - return img_obj.resolutions["level_count"] - 1 - if self.backend == "tifffile": - return len(img_obj.pages) - 1 - - def _get_image_size(self, img, size, level, location): - """ - Calculate the maximum region size for the given level and starting location (if size is None). - Note that region size in OpenSlide and cuCIM are WxH (but the final image output would be HxW) - """ - if size is not None: - return size[::-1] - - max_size = [] - downsampling_factor = [] - if self.backend == "openslide": - downsampling_factor = img.level_downsamples[level] - max_size = img.level_dimensions[level] - elif self.backend == "cucim": - downsampling_factor = img.resolutions["level_downsamples"][level] - max_size = img.resolutions["level_dimensions"][level] - - # subtract the top left corner of the patch (at given level) from maximum size - location_at_level = (round(location[1] / downsampling_factor), round(location[0] / downsampling_factor)) - size = [max_size[i] - location_at_level[i] for i in range(len(max_size))] - - return size - - def _extract_region( - self, - img_obj, - size: Optional[Tuple[int, int]], - location: Tuple[int, int] = (0, 0), - level: int = 0, - dtype: DtypeLike = np.uint8, - ): - if self.backend == "tifffile": - # Read the entire image - if size is not None: - raise ValueError( - f"TiffFile backend reads the entire image only, so size '{size}'' should not be provided!", - "For more flexibility or extracting regions, please use cuCIM or OpenSlide backend.", - ) - if location != (0, 0): - raise ValueError( - f"TiffFile backend reads the entire image only, so location '{location}' should not be provided!", - "For more flexibility and extracting regions, please use cuCIM or OpenSlide backend.", - ) - region = img_obj.asarray(level=level) - else: - # Get region size to be extracted - region_size = self._get_image_size(img_obj, size, level, location) - # reverse the order of location's dimensions to become WxH (for cuCIM and OpenSlide) - region_location = location[::-1] - # Extract a region (or the entire image) - region = img_obj.read_region(location=region_location, size=region_size, level=level) - - region = self.convert_to_rgb_array(region, dtype) - return region - - def convert_to_rgb_array(self, raw_region, dtype: DtypeLike = np.uint8): - """Convert to RGB mode and numpy array""" - if self.backend == "openslide": - # convert to RGB - raw_region = raw_region.convert("RGB") - - # convert to numpy (if not already in numpy) - raw_region = np.asarray(raw_region, dtype=dtype) - - # check if the image has three dimensions (2D + color) - if raw_region.ndim != 3: - raise ValueError( - f"The input image dimension should be 3 but {raw_region.ndim} is given. " - "`WSIReader` is designed to work only with 2D colored images." - ) - - # check if the color channel is 3 (RGB) or 4 (RGBA) - if raw_region.shape[-1] not in [3, 4]: - raise ValueError( - f"There should be three or four color channels but {raw_region.shape[-1]} is given. " - "`WSIReader` is designed to work only with 2D colored images." - ) - - # remove alpha channel if exist (RGBA) - if raw_region.shape[-1] > 3: - raw_region = raw_region[..., :3] - - return raw_region - - def _extract_patches( - self, - region: np.ndarray, - grid_shape: Tuple[int, int] = (1, 1), - patch_size: Optional[Tuple[int, int]] = None, - dtype: DtypeLike = np.uint8, - ): - if patch_size is None and grid_shape == (1, 1): - return region - - n_patches = grid_shape[0] * grid_shape[1] - region_size = region.shape[1:] - - if patch_size is None: - patch_size = (region_size[0] // grid_shape[0], region_size[1] // grid_shape[1]) - - # split the region into patches on the grid and center crop them to patch size - flat_patch_grid = np.zeros((n_patches, 3, patch_size[0], patch_size[1]), dtype=dtype) - start_points = [ - np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size[i] / 2).astype(int) - for i in range(2) - ] - idx = 0 - for y_start in start_points[1]: - for x_start in start_points[0]: - x_end = x_start + patch_size[0] - y_end = y_start + patch_size[1] - flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] - idx += 1 - - return flat_patch_grid From b64c087330742f4badc55048063a7fe560f4d371 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:11:16 +0000 Subject: [PATCH 07/26] Update unittests Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_wsireader.py | 53 +--------- tests/test_wsireader_new.py | 190 ------------------------------------ 2 files changed, 2 insertions(+), 241 deletions(-) delete mode 100644 tests/test_wsireader_new.py diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index 6ee02143b8..26f175b7ae 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -19,7 +19,7 @@ from parameterized import parameterized from monai.data import DataLoader, Dataset -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.transforms import Compose, LoadImaged, ToTensord from monai.utils import first, optional_import from monai.utils.enums import PostFix @@ -56,29 +56,6 @@ np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), ] -TEST_CASE_3 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, - np.array( - [ - [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], - [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], - ] - ), -] - -TEST_CASE_4 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), -] - -TEST_CASE_5 = [ - FILE_PATH, - {"location": (HEIGHT - 2, WIDTH - 2), "level": 0, "grid_shape": (1, 1)}, - np.array([[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[237, 237], [237, 237]]]), -] - TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW @@ -138,7 +115,7 @@ def test_read_whole_image(self, file_path, level, expected_shape): img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) - @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) def test_read_region(self, file_path, patch_info, expected_img): kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} reader = WSIReader(self.backend, **kwargs) @@ -155,18 +132,6 @@ def test_read_region(self, file_path, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) - def test_read_patches(self, file_path, patch_info, expected_img): - reader = WSIReader(self.backend) - with reader.read(file_path) as img_obj: - if self.backend == "tifffile": - with self.assertRaises(ValueError): - reader.get_data(img_obj, **patch_info)[0] - else: - img = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") def test_read_rgba(self, img_expected): @@ -221,19 +186,5 @@ def setUpClass(cls): cls.backend = "cucim" -@skipUnless(has_osl, "Requires OpenSlide") -class TestOpenSlide(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "openslide" - - -@skipUnless(has_tiff, "Requires TiffFile") -class TestTiffFile(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "tifffile" - - if __name__ == "__main__": unittest.main() diff --git a/tests/test_wsireader_new.py b/tests/test_wsireader_new.py deleted file mode 100644 index 26f175b7ae..0000000000 --- a/tests/test_wsireader_new.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) MONAI Consortium -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import unittest -from unittest import skipUnless - -import numpy as np -import torch -from numpy.testing import assert_array_equal -from parameterized import parameterized - -from monai.data import DataLoader, Dataset -from monai.data.wsi_reader import WSIReader -from monai.transforms import Compose, LoadImaged, ToTensord -from monai.utils import first, optional_import -from monai.utils.enums import PostFix -from tests.utils import download_url_or_skip_test, testing_data_config - -cucim, has_cucim = optional_import("cucim") -has_cucim = has_cucim and hasattr(cucim, "CuImage") -openslide, has_osl = optional_import("openslide") -imwrite, has_tiff = optional_import("tifffile", name="imwrite") -_, has_codec = optional_import("imagecodecs") -has_tiff = has_tiff and has_codec - -FILE_KEY = "wsi_img" -FILE_URL = testing_data_config("images", FILE_KEY, "url") -base_name, extension = os.path.basename(f"{FILE_URL}"), ".tiff" -FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) - -HEIGHT = 32914 -WIDTH = 46000 - -TEST_CASE_0 = [FILE_PATH, 2, (3, HEIGHT // 4, WIDTH // 4)] - -TEST_CASE_TRANSFORM_0 = [FILE_PATH, 4, (HEIGHT // 16, WIDTH // 16), (1, 3, HEIGHT // 16, WIDTH // 16)] - -TEST_CASE_1 = [ - FILE_PATH, - {"location": (HEIGHT // 2, WIDTH // 2), "size": (2, 1), "level": 0}, - np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), -] - -TEST_CASE_2 = [ - FILE_PATH, - {"location": (0, 0), "size": (2, 1), "level": 2}, - np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), -] - - -TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW - -TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW - -TEST_CASE_ERROR_GRAY = [np.ones((16, 16), dtype=np.uint8)] # no color channel -TEST_CASE_ERROR_3D = [np.ones((16, 16, 16, 3), dtype=np.uint8)] # 3D + color - - -def save_rgba_tiff(array: np.ndarray, filename: str, mode: str): - """ - Save numpy array into a TIFF RGB/RGBA file - - Args: - array: numpy ndarray with the shape of CxHxW and C==3 representing a RGB image - filename: the filename to be used for the tiff file. '_RGB.tiff' or '_RGBA.tiff' will be appended to this filename. - mode: RGB or RGBA - """ - if mode == "RGBA": - array = np.concatenate([array, 255 * np.ones_like(array[0])[np.newaxis]]).astype(np.uint8) - - img_rgb = array.transpose(1, 2, 0) - imwrite(filename, img_rgb, shape=img_rgb.shape, tile=(16, 16)) - - return filename - - -def save_gray_tiff(array: np.ndarray, filename: str): - """ - Save numpy array into a TIFF file - - Args: - array: numpy ndarray with any shape - filename: the filename to be used for the tiff file. - """ - img_gray = array - imwrite(filename, img_gray, shape=img_gray.shape, photometric="rgb") - - return filename - - -@skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") -def setUpModule(): # noqa: N802 - hash_type = testing_data_config("images", FILE_KEY, "hash_type") - hash_val = testing_data_config("images", FILE_KEY, "hash_val") - download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) - - -class WSIReaderTests: - class Tests(unittest.TestCase): - backend = None - - @parameterized.expand([TEST_CASE_0]) - def test_read_whole_image(self, file_path, level, expected_shape): - reader = WSIReader(self.backend, level=level) - with reader.read(file_path) as img_obj: - img = reader.get_data(img_obj)[0] - self.assertTupleEqual(img.shape, expected_shape) - - @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) - def test_read_region(self, file_path, patch_info, expected_img): - kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} - reader = WSIReader(self.backend, **kwargs) - with reader.read(file_path, **kwargs) as img_obj: - if self.backend == "tifffile": - with self.assertRaises(ValueError): - reader.get_data(img_obj, **patch_info)[0] - else: - # Read twice to check multiple calls - img = reader.get_data(img_obj, **patch_info)[0] - img2 = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, img2.shape) - self.assertIsNone(assert_array_equal(img, img2)) - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) - - @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) - @skipUnless(has_tiff, "Requires tifffile.") - def test_read_rgba(self, img_expected): - # skip for OpenSlide since not working with images without tiles - if self.backend == "openslide": - return - image = {} - reader = WSIReader(self.backend) - for mode in ["RGB", "RGBA"]: - file_path = save_rgba_tiff( - img_expected, - os.path.join(os.path.dirname(__file__), "testing_data", f"temp_tiff_image_{mode}.tiff"), - mode=mode, - ) - with reader.read(file_path) as img_obj: - image[mode], _ = reader.get_data(img_obj) - - self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) - self.assertIsNone(assert_array_equal(image["RGBA"], img_expected)) - - @parameterized.expand([TEST_CASE_ERROR_GRAY, TEST_CASE_ERROR_3D]) - @skipUnless(has_tiff, "Requires tifffile.") - def test_read_malformats(self, img_expected): - reader = WSIReader(self.backend) - file_path = save_gray_tiff( - img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") - ) - with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): - with reader.read(file_path) as img_obj: - reader.get_data(img_obj) - - @parameterized.expand([TEST_CASE_TRANSFORM_0]) - def test_with_dataloader(self, file_path, level, expected_spatial_shape, expected_shape): - train_transform = Compose( - [ - LoadImaged(keys=["image"], reader=WSIReader, backend=self.backend, level=level), - ToTensord(keys=["image"]), - ] - ) - dataset = Dataset([{"image": file_path}], transform=train_transform) - data_loader = DataLoader(dataset) - data: dict = first(data_loader) - for s in data[PostFix.meta("image")]["spatial_shape"]: - torch.testing.assert_allclose(s, expected_spatial_shape) - self.assertTupleEqual(data["image"].shape, expected_shape) - - -@skipUnless(has_cucim, "Requires cucim") -class TestCuCIM(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "cucim" - - -if __name__ == "__main__": - unittest.main() From 7f09e7364f4d8348582542414ee8c2c7d9fe3e33 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:14:05 +0000 Subject: [PATCH 08/26] Update docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/data.rst b/docs/source/data.rst index dcb0cad2d7..d9c6ed0881 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -156,17 +156,17 @@ Whole slide image reader ------------------------ BaseWSIReader -~~~~~~~~~~~ +~~~~~~~~~~~~~ .. autoclass:: BaseWSIReader :members: WSIReader -~~~~~~~~~~~ +~~~~~~~~~ .. autoclass:: Reader :members: CuCIMWSIReader -~~~~~~~~~~~ +~~~~~~~~~~~~~~ .. autoclass:: CuCIMWSIReader :members: From d95e1c2464a2cba6f2262a055b326408553062a5 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:18:32 +0000 Subject: [PATCH 09/26] sort imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 247b58ffd1..8544fc2398 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -35,7 +35,6 @@ from .grid_dataset import GridPatchDataset, PatchDataset, PatchIter, PatchIterd from .image_dataset import ImageDataset from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader -from .wsi_reader import WSIReader, CuCIMWSIReader from .image_writer import ( SUPPORTED_WRITERS, ImageWriter, @@ -88,3 +87,4 @@ worker_init_fn, zoom_affine, ) +from .wsi_reader import CuCIMWSIReader, WSIReader From 025376b59e157c8332cb09a5856f831b58c7336a Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 13:38:06 +0000 Subject: [PATCH 10/26] Clean up imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/image_reader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 41bf92c2ed..f5d7fdef9d 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -19,8 +19,7 @@ from monai.config import DtypeLike, KeysCollection, PathLike from monai.data.utils import correct_nifti_header_if_necessary, is_supported_format, orientation_ras_lps -from monai.transforms.utility.array import EnsureChannelFirst -from monai.utils import ensure_tuple, ensure_tuple_rep, optional_import, require_pkg +from monai.utils import ensure_tuple, optional_import, require_pkg if TYPE_CHECKING: import itk From 3e6fb27cfbe89db5f508f2aa1d1765982d39cf68 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:32:09 +0000 Subject: [PATCH 11/26] Update docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 2 +- monai/data/wsi_reader.py | 78 +++++++++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 8544fc2398..b9b3af2968 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -87,4 +87,4 @@ worker_init_fn, zoom_affine, ) -from .wsi_reader import CuCIMWSIReader, WSIReader +from .wsi_reader import BaseWSIReader, CuCIMWSIReader, WSIReader diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 94e7c98c3d..e1679f90d9 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -27,9 +27,9 @@ class BaseWSIReader(ImageReader): """ - An abstract class defines APIs to load whole slide image files. + An abstract class that defines APIs to load patches from whole slide image files. - Typical usage of an implementation of this class is: + Typical usage of a concrete implementation of this class is: .. code-block:: python @@ -37,10 +37,20 @@ class BaseWSIReader(ImageReader): wsi = image_reader.read(path_to_image) img_data, meta_data = image_reader.get_data(wsi) - - The `read` call converts image filenames into image objects, + The following methods are already implemented deligate tasks to other abstract methods (see below): + - The `read` call converts image filenames into whole slide image (wsi) objects. - The `get_data` call fetches the image data, as well as meta data. - - A reader should implement `verify_suffix` with the logic of checking the input filename - by the filename extensions. + - `verify_suffix` verifies + - `_verify_output` verifies the extracted patch to be a two dimensional RGB/RGBA image + + The following methods needs to be implemented for any concrete implementation of this class: + - `_reader` returns a whole slide image reader module that given filename and additional arguments, + returns image object. + - `_get_size` returns the size of the whole slide image of a given wsi object at a given level. + - `_get_level_count` returns the number of levels in the whole slide image + - `_get_patch` extracts and returns a patch image form the whole slide image + - `_get_metadata` extracts and returns metadata for a whole slide image and a specific patch. + """ @@ -55,18 +65,24 @@ def __init__(self, level: int, **kwargs): @property @abstractmethod def _reader(self): + """Returns a whole slide image reader module that given filename and additional arguments, returns image object.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod def _get_size(self, wsi, level): + """Returns the size of the whole slide image of a given wsi object at a given level.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod def _get_level_count(self, wsi): + """Returns the number of levels in the whole slide image.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): + def _get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ): + """Extracts and returns a patch image form the whole slide image.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod @@ -107,7 +123,7 @@ def get_data( mode: str = "RGB", ): """ - Extract patchs as numpy array from WSI image and return them. + Verifies inputs, extracts patchs from WSI image and generates metadata, and return them. Args: wsi: a whole slide image object loaded from a file @@ -117,6 +133,7 @@ def get_data( This is the size of image at the given level (`level`) level: the level number, or list of level numbers (default=0) dtype: the data type of output image + mode: the output image mode 'RGB' or 'RGBA' Returns: a tuples, where the first element is an image [CxHxW], and second element is a dictionary of metadata @@ -145,7 +162,7 @@ def get_data( raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") # Extract a patch or the entire image - patch = self._get_patch(wsi, location=location, size=size, level=level, dtype=dtype) + patch = self._get_patch(wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) # Verify patch image patch = self._verify_output(patch, mode) @@ -193,9 +210,20 @@ def _verify_output(self, patch: np.ndarray, mode: str): class WSIReader(BaseWSIReader): + """ + WSIReader that supports different implemented backends + + The support for any backend can be achieved by + .. code-block:: python + if self.backend == "any_backend": + self.backend_lib = AnyBackendWSIReader(level=level, **kwargs) + + """ + def __init__(self, backend="cucim", level: int = 0, **kwargs): super().__init__(level, **kwargs) self.backend = backend.lower() + # Any new backend can be added below if self.backend == "cucim": self.backend_lib = CuCIMWSIReader(level=level, **kwargs) else: @@ -215,8 +243,10 @@ def _get_size(self, wsi, level): def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): return self.backend_lib._get_metadata(wsi=wsi, patch=patch, size=size, location=location, level=level) - def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): - return self.backend_lib._get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype) + def _get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ): + return self.backend_lib._get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) @require_pkg(pkg_name="cucim") @@ -227,8 +257,8 @@ class CuCIMWSIReader(BaseWSIReader): Args: level: the whole slide image level at which the image is extracted. (default=0) This is overridden if the level argument is provided in `get_data`. - kwargs: additional args for backend reading API in `read()`, more details in `cuCIM`: - https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. + kwargs: additional args for `cucim.CuImage` module: + https://github.com/rapidsai/cucim/blob/main/cpp/include/cucim/cuimage.h """ @@ -260,14 +290,9 @@ def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: } return metadata - def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike): - """ - Extract a patch based on given output from the given whole slide image - Args: - - Returns: - a numpy array with dimesion of [3xWxH] - """ + def _get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ): # extract a patch (or the entire image) # reverse the order of location and size to become WxH for cuCIM patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) @@ -278,4 +303,17 @@ def _get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], leve # make it channel first patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) + # check if the color channel is 3 (RGB) or 4 (RGBA) + if mode == "RGBA" and patch.shape[0] != 4: + raise ValueError( + f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." + ) + + if mode in "RGB": + if patch.shape[0] not in [3, 4]: + raise ValueError( + f"The image is expected to have three or four color channels in '{mode}' mode but has {patch.shape[0]}. " + ) + patch = patch[:3] + return patch From b8a244421930379b2e00550d483ccb14ddb69a4c Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:36:33 +0000 Subject: [PATCH 12/26] Update docs and docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 2 +- monai/data/wsi_reader.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/source/data.rst b/docs/source/data.rst index d9c6ed0881..c968d72945 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -162,7 +162,7 @@ BaseWSIReader WSIReader ~~~~~~~~~ -.. autoclass:: Reader +.. autoclass:: WSIReader :members: CuCIMWSIReader diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index e1679f90d9..86ec1da8eb 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -38,12 +38,14 @@ class BaseWSIReader(ImageReader): img_data, meta_data = image_reader.get_data(wsi) The following methods are already implemented deligate tasks to other abstract methods (see below): + - The `read` call converts image filenames into whole slide image (wsi) objects. - The `get_data` call fetches the image data, as well as meta data. - `verify_suffix` verifies - `_verify_output` verifies the extracted patch to be a two dimensional RGB/RGBA image The following methods needs to be implemented for any concrete implementation of this class: + - `_reader` returns a whole slide image reader module that given filename and additional arguments, returns image object. - `_get_size` returns the size of the whole slide image of a given wsi object at a given level. @@ -214,7 +216,9 @@ class WSIReader(BaseWSIReader): WSIReader that supports different implemented backends The support for any backend can be achieved by + .. code-block:: python + if self.backend == "any_backend": self.backend_lib = AnyBackendWSIReader(level=level, **kwargs) From 0268a03b3cbb917ae9a1d4c77df5dd4cc3600513 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:37:01 +0000 Subject: [PATCH 13/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/data/wsi_reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 86ec1da8eb..5f438f17f1 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -38,14 +38,14 @@ class BaseWSIReader(ImageReader): img_data, meta_data = image_reader.get_data(wsi) The following methods are already implemented deligate tasks to other abstract methods (see below): - + - The `read` call converts image filenames into whole slide image (wsi) objects. - The `get_data` call fetches the image data, as well as meta data. - `verify_suffix` verifies - `_verify_output` verifies the extracted patch to be a two dimensional RGB/RGBA image The following methods needs to be implemented for any concrete implementation of this class: - + - `_reader` returns a whole slide image reader module that given filename and additional arguments, returns image object. - `_get_size` returns the size of the whole slide image of a given wsi object at a given level. @@ -216,7 +216,7 @@ class WSIReader(BaseWSIReader): WSIReader that supports different implemented backends The support for any backend can be achieved by - + .. code-block:: python if self.backend == "any_backend": From f2b14aaa27647b197a61c58da838564fdf83582d Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:46:25 +0000 Subject: [PATCH 14/26] Fix a typo Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 86ec1da8eb..40cc8f4c87 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -125,7 +125,7 @@ def get_data( mode: str = "RGB", ): """ - Verifies inputs, extracts patchs from WSI image and generates metadata, and return them. + Verifies inputs, extracts patches from WSI image and generates metadata, and return them. Args: wsi: a whole slide image object loaded from a file From aed0ac596543ba9135211b250fd28569a948c775 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:52:27 +0000 Subject: [PATCH 15/26] Remove redundant checking Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 40cc8f4c87..1a14b688c8 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -186,7 +186,7 @@ def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: def _verify_output(self, patch: np.ndarray, mode: str): """ - Verify image output + Verify output image patch """ # check if the image has three dimensions (2D + color) if patch.ndim != 3: @@ -195,19 +195,6 @@ def _verify_output(self, patch: np.ndarray, mode: str): "`WSIReader` is designed to work only with 2D images with color channel." ) - # check if the color channel is 3 (RGB) or 4 (RGBA) - if mode == "RGBA" and patch.shape[0] != 4: - raise ValueError( - f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." - ) - - if mode in "RGB": - if patch.shape[0] not in [3, 4]: - raise ValueError( - f"The image is expected to have three or four color channels in '{mode}' mode but has {patch.shape[0]}. " - ) - patch = patch[:3] - return patch From ae4ff17f84df7e7841190aac34892ce469594e70 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 14 Apr 2022 17:34:08 +0000 Subject: [PATCH 16/26] Update read and other methods Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 134 ++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 78 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index d1933b783d..9deabfebd6 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -34,24 +34,19 @@ class BaseWSIReader(ImageReader): .. code-block:: python image_reader = MyWSIReader() - wsi = image_reader.read(path_to_image) + wsi = image_reader.read(, **kwargs) img_data, meta_data = image_reader.get_data(wsi) - The following methods are already implemented deligate tasks to other abstract methods (see below): - - - The `read` call converts image filenames into whole slide image (wsi) objects. + - The `read` call converts an image filename into whole slide image object, - The `get_data` call fetches the image data, as well as meta data. - - `verify_suffix` verifies - - `_verify_output` verifies the extracted patch to be a two dimensional RGB/RGBA image The following methods needs to be implemented for any concrete implementation of this class: - - `_reader` returns a whole slide image reader module that given filename and additional arguments, - returns image object. - - `_get_size` returns the size of the whole slide image of a given wsi object at a given level. - - `_get_level_count` returns the number of levels in the whole slide image - - `_get_patch` extracts and returns a patch image form the whole slide image - - `_get_metadata` extracts and returns metadata for a whole slide image and a specific patch. + - `read` reads a whole slide image object from a given file + - `get_size` returns the size of the whole slide image of a given wsi object at a given level. + - `get_level_count` returns the number of levels in the whole slide image + - `get_patch` extracts and returns a patch image form the whole slide image + - `get_metadata` extracts and returns metadata for a whole slide image and a specific patch. """ @@ -64,57 +59,25 @@ def __init__(self, level: int, **kwargs): self.kwargs = kwargs self.metadata: Dict[Any, Any] = {} - @property - @abstractmethod - def _reader(self): - """Returns a whole slide image reader module that given filename and additional arguments, returns image object.""" - raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") - @abstractmethod - def _get_size(self, wsi, level): + def get_size(self, wsi, level): """Returns the size of the whole slide image of a given wsi object at a given level.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def _get_level_count(self, wsi): + def get_level_count(self, wsi): """Returns the number of levels in the whole slide image.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def _get_patch( - self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str - ): + def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): """Extracts and returns a patch image form the whole slide image.""" raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") - def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): - """ - Read image data from given file or list of files. - - Args: - data: file name or a list of file names to read. - kwargs: additional args for backend reading API in `read()`, will override `self.kwargs` for existing keys. - more details in `cuCIM`: https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. - - Returns: - image object or list of image objects - - """ - wsi_list: List = [] - - filenames: Sequence[PathLike] = ensure_tuple(data) - kwargs_ = self.kwargs.copy() - kwargs_.update(kwargs) - for filename in filenames: - wsi = self._reader(filename, **kwargs_) - wsi_list.append(wsi) - - return wsi_list if len(filenames) > 1 else wsi_list[0] - def get_data( self, wsi, @@ -143,14 +106,14 @@ def get_data( # Verify magnification level if level is None: level = self.level - max_level = self._get_level_count(wsi) - 1 + max_level = self.get_level_count(wsi) - 1 if level > max_level: raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") # Verify location if location is None: location = (0, 0) - wsi_size = self._get_size(wsi, level) + wsi_size = self.get_size(wsi, level) if location[0] > wsi_size[0] or location[1] > wsi_size[1]: raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") @@ -158,19 +121,19 @@ def get_data( if size is None: if location != (0, 0): raise ValueError("Patch size should be defined to exctract patches.") - size = self._get_size(wsi, level) + size = self.get_size(wsi, level) else: if size[0] <= 0 or size[1] <= 0: raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") # Extract a patch or the entire image - patch = self._get_patch(wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + patch = self.get_patch(wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) # Verify patch image - patch = self._verify_output(patch, mode) + patch = self.verify_output(patch, mode) # Set patch-related metadata - metadata = self._get_metadata(wsi=wsi, patch=patch, location=location, size=size, level=level) + metadata = self.get_metadata(wsi=wsi, patch=patch, location=location, size=size, level=level) return patch, metadata @@ -184,7 +147,7 @@ def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: """ return is_supported_format(filename, self.supported_formats) - def _verify_output(self, patch: np.ndarray, mode: str): + def verify_output(self, patch: np.ndarray, mode: str): """ Verify output image patch """ @@ -221,23 +184,20 @@ def __init__(self, backend="cucim", level: int = 0, **kwargs): raise ValueError("The supported backends are: cucim") self.supported_formats = self.backend_lib.supported_formats - @property - def _reader(self): - return self.backend_lib._reader + def get_level_count(self, wsi): + return self.backend_lib.get_level_count(wsi) - def _get_level_count(self, wsi): - return self.backend_lib._get_level_count(wsi) + def get_size(self, wsi, level): + return self.backend_lib.get_size(wsi, level) - def _get_size(self, wsi, level): - return self.backend_lib._get_size(wsi, level) + def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + return self.backend_lib.get_metadata(wsi=wsi, patch=patch, size=size, location=location, level=level) - def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): - return self.backend_lib._get_metadata(wsi=wsi, patch=patch, size=size, location=location, level=level) + def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): + return self.backend_lib.get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) - def _get_patch( - self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str - ): - return self.backend_lib._get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + return self.backend_lib.read(data=data, **kwargs) @require_pkg(pkg_name="cucim") @@ -258,19 +218,15 @@ class CuCIMWSIReader(BaseWSIReader): def __init__(self, level: int = 0, **kwargs): super().__init__(level, **kwargs) - @property - def _reader(self): - return CuImage - @staticmethod - def _get_level_count(wsi): + def get_level_count(wsi): return wsi.resolutions["level_count"] @staticmethod - def _get_size(wsi, level): + def get_size(wsi, level): return wsi.resolutions["level_dimensions"][level][::-1] - def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): metadata: Dict = { "backend": "cucim", "spatial_shape": np.asarray(patch.shape[1:]), @@ -281,9 +237,31 @@ def _get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: } return metadata - def _get_patch( - self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str - ): + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + """ + Read whole slide image objects from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args that overrides `self.kwargs` for existing keys. + For more details look at https://github.com/rapidsai/cucim/blob/main/cpp/include/cucim/cuimage.h + + Returns: + whole slide image object or list of such objects + + """ + wsi_list: List = [] + + filenames: Sequence[PathLike] = ensure_tuple(data) + kwargs_ = self.kwargs.copy() + kwargs_.update(kwargs) + for filename in filenames: + wsi = CuImage(filename, **kwargs_) + wsi_list.append(wsi) + + return wsi_list if len(filenames) > 1 else wsi_list[0] + + def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): # extract a patch (or the entire image) # reverse the order of location and size to become WxH for cuCIM patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) From e5a7a1875114cfd18351d83b6b07a64d68665fd4 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 18 Apr 2022 14:43:58 +0000 Subject: [PATCH 17/26] Update wsireader to support multi image and update docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 267 +++++++++++++++++++++++++++++---------- 1 file changed, 202 insertions(+), 65 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 9deabfebd6..4ccd40a1c5 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -15,7 +15,7 @@ import numpy as np from monai.config import DtypeLike, PathLike -from monai.data.image_reader import ImageReader +from monai.data.image_reader import ImageReader, _stack_images from monai.data.utils import is_supported_format from monai.transforms.utility.array import EnsureChannelFirst from monai.utils import ensure_tuple, optional_import, require_pkg @@ -60,22 +60,58 @@ def __init__(self, level: int, **kwargs): self.metadata: Dict[Any, Any] = {} @abstractmethod - def get_size(self, wsi, level): - """Returns the size of the whole slide image of a given wsi object at a given level.""" + def get_size(self, wsi, level) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod def get_level_count(self, wsi): - """Returns the number of levels in the whole slide image.""" + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): - """Extracts and returns a patch image form the whole slide image.""" + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + """ + Extracts and returns metadata form the whole slide image. + + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") def get_data( @@ -91,65 +127,77 @@ def get_data( Verifies inputs, extracts patches from WSI image and generates metadata, and return them. Args: - wsi: a whole slide image object loaded from a file - location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, - or list of tuples (default=(0, 0)) - size: (height, width) tuple giving the patch size, or list of tuples (default to full image size) - This is the size of image at the given level (`level`) - level: the level number, or list of level numbers (default=0) + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 dtype: the data type of output image - mode: the output image mode 'RGB' or 'RGBA' + mode: the output image mode, 'RGB' or 'RGBA' Returns: - a tuples, where the first element is an image [CxHxW], and second element is a dictionary of metadata - """ - # Verify magnification level - if level is None: - level = self.level - max_level = self.get_level_count(wsi) - 1 - if level > max_level: - raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") - - # Verify location - if location is None: - location = (0, 0) - wsi_size = self.get_size(wsi, level) - if location[0] > wsi_size[0] or location[1] > wsi_size[1]: - raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") - - # Verify size - if size is None: - if location != (0, 0): - raise ValueError("Patch size should be defined to exctract patches.") - size = self.get_size(wsi, level) - else: - if size[0] <= 0 or size[1] <= 0: - raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") - - # Extract a patch or the entire image - patch = self.get_patch(wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) - - # Verify patch image - patch = self.verify_output(patch, mode) - - # Set patch-related metadata - metadata = self.get_metadata(wsi=wsi, patch=patch, location=location, size=size, level=level) - - return patch, metadata + a tuples, where the first element is an image patch [CxHxW] or stack of patches, + and second element is a dictionary of metadata + """ + patch_list: List = [] + metadata = {} + for each_wsi in ensure_tuple(wsi): + # Verify magnification level + if level is None: + level = self.level + max_level = self.get_level_count(each_wsi) - 1 + if level > max_level: + raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") + + # Verify location + if location is None: + location = (0, 0) + wsi_size = self.get_size(each_wsi, level) + if location[0] > wsi_size[0] or location[1] > wsi_size[1]: + raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") + + # Verify size + if size is None: + if location != (0, 0): + raise ValueError("Patch size should be defined to exctract patches.") + size = self.get_size(each_wsi, level) + else: + if size[0] <= 0 or size[1] <= 0: + raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") + + # Extract a patch or the entire image + patch = self.get_patch(each_wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + + # Verify patch image + patch = self.verify_output(patch, mode) + + # Create a list of patches + patch_list.append(patch) + + # Set patch-related metadata + each_meta = self.get_metadata(patch=patch, location=location, size=size, level=level) + metadata.update(each_meta) + + return _stack_images(patch_list, metadata), metadata def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: """ Verify whether the specified file or files format is supported by WSI reader. + The list of supported suffixes are read from `self.supported_formats`. + Args: - filename: file name or a list of file names to read. - if a list of files, verify all the suffixes. + filename: filename or a list of filenames to read. + """ return is_supported_format(filename, self.supported_formats) def verify_output(self, patch: np.ndarray, mode: str): """ - Verify output image patch + Verify output image patch to have consistent outputs + + Args: + patch: extracted patch from the whole slide image """ # check if the image has three dimensions (2D + color) if patch.ndim != 3: @@ -170,7 +218,7 @@ class WSIReader(BaseWSIReader): .. code-block:: python if self.backend == "any_backend": - self.backend_lib = AnyBackendWSIReader(level=level, **kwargs) + self.reader = AnyBackendWSIReader(level=level, **kwargs) """ @@ -179,25 +227,75 @@ def __init__(self, backend="cucim", level: int = 0, **kwargs): self.backend = backend.lower() # Any new backend can be added below if self.backend == "cucim": - self.backend_lib = CuCIMWSIReader(level=level, **kwargs) + self.reader = CuCIMWSIReader(level=level, **kwargs) else: raise ValueError("The supported backends are: cucim") - self.supported_formats = self.backend_lib.supported_formats + self.supported_formats = self.reader.supported_formats def get_level_count(self, wsi): - return self.backend_lib.get_level_count(wsi) + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ + return self.reader.get_level_count(wsi) + + def get_size(self, wsi, level) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated - def get_size(self, wsi, level): - return self.backend_lib.get_size(wsi, level) + """ + return self.reader.get_size(wsi, level) + + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + """ + Extracts and returns metadata form the whole slide image. - def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): - return self.backend_lib.get_metadata(wsi=wsi, patch=patch, size=size, location=location, level=level) + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ + return self.reader.get_metadata(patch=patch, size=size, location=location, level=level) def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): - return self.backend_lib.get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ + return self.reader.get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): - return self.backend_lib.read(data=data, **kwargs) + """ + Read whole slide image objects from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args for the reader module (overrides `self.kwargs` for existing keys). + + Returns: + whole slide image object or list of such objects + + """ + return self.reader.read(data=data, **kwargs) @require_pkg(pkg_name="cucim") @@ -220,17 +318,43 @@ def __init__(self, level: int = 0, **kwargs): @staticmethod def get_level_count(wsi): + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ return wsi.resolutions["level_count"] @staticmethod - def get_size(wsi, level): - return wsi.resolutions["level_dimensions"][level][::-1] + def get_size(wsi, level) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return (wsi.resolutions["level_dimensions"][level][1], wsi.resolutions["level_dimensions"][level][0]) - def get_metadata(self, wsi, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + """ + Extracts and returns metadata form the whole slide image. + + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ metadata: Dict = { "backend": "cucim", "spatial_shape": np.asarray(patch.shape[1:]), - "original_channel_dim": -1, + "original_channel_dim": 0, "location": location, "size": size, "level": level, @@ -262,6 +386,19 @@ def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): return wsi_list if len(filenames) > 1 else wsi_list[0] def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ # extract a patch (or the entire image) # reverse the order of location and size to become WxH for cuCIM patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) From da6b675924a837034103227b45c7275c154f8aa9 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 18 Apr 2022 14:59:41 +0000 Subject: [PATCH 18/26] Make workaround for CuImage objects Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 4ccd40a1c5..7dba0033ab 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -141,6 +141,9 @@ def get_data( """ patch_list: List = [] metadata = {} + # A temporary workaround for a CuImage object being iterable: https://github.com/rapidsai/cucim/issues/264 + if not isinstance(wsi, List): + wsi = [wsi] for each_wsi in ensure_tuple(wsi): # Verify magnification level if level is None: @@ -399,17 +402,17 @@ def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level mode: the output image mode, 'RGB' or 'RGBA' """ - # extract a patch (or the entire image) - # reverse the order of location and size to become WxH for cuCIM + # Extract a patch or the entire image + # (reverse the order of location and size to become WxH for cuCIM) patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) - # convert to numpy + # Convert to numpy patch = np.asarray(patch, dtype=dtype) - # make it channel first + # Make it channel first patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) - # check if the color channel is 3 (RGB) or 4 (RGBA) + # Check if the color channel is 3 (RGB) or 4 (RGBA) if mode == "RGBA" and patch.shape[0] != 4: raise ValueError( f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." From 1ed8154cbbadcbc7aec6a8ed620dacf94ff57c97 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 18 Apr 2022 15:00:18 +0000 Subject: [PATCH 19/26] Add unittests for multi image reading Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_wsireader.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index 26f175b7ae..7b288f6040 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -56,6 +56,17 @@ np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), ] +TEST_CASE_3 = [ + [FILE_PATH, FILE_PATH], + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.concatenate( + [ + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + ], + axis=0, + ), +] TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW @@ -132,6 +143,23 @@ def test_read_region(self, file_path, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_3]) + def test_read_region_multi_wsi(self, file_path, patch_info, expected_img): + kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} + reader = WSIReader(self.backend, **kwargs) + img_obj = reader.read(file_path, **kwargs) + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") def test_read_rgba(self, img_expected): From 77f87477102755b1718ac1da583b764b5e79fa54 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 18 Apr 2022 18:59:58 +0000 Subject: [PATCH 20/26] Update a note about cucim Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 7dba0033ab..6111dca3c4 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -141,7 +141,7 @@ def get_data( """ patch_list: List = [] metadata = {} - # A temporary workaround for a CuImage object being iterable: https://github.com/rapidsai/cucim/issues/264 + # CuImage object is iterable, so ensure_tuple won't work on single object if not isinstance(wsi, List): wsi = [wsi] for each_wsi in ensure_tuple(wsi): From 36e5b170a841d51cfb78d7b70f75932074868a63 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 13:52:51 +0000 Subject: [PATCH 21/26] Update type hints and docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_reader.py | 82 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py index 6111dca3c4..4899fb8830 100644 --- a/monai/data/wsi_reader.py +++ b/monai/data/wsi_reader.py @@ -51,7 +51,7 @@ class BaseWSIReader(ImageReader): """ - supported_formats: List[str] = [] + supported_suffixes: List[str] = [] def __init__(self, level: int, **kwargs): super().__init__() @@ -60,7 +60,7 @@ def __init__(self, level: int, **kwargs): self.metadata: Dict[Any, Any] = {} @abstractmethod - def get_size(self, wsi, level) -> Tuple[int, int]: + def get_size(self, wsi, level: int) -> Tuple[int, int]: """ Returns the size of the whole slide image at a given level. @@ -72,7 +72,7 @@ def get_size(self, wsi, level) -> Tuple[int, int]: raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def get_level_count(self, wsi): + def get_level_count(self, wsi) -> int: """ Returns the number of levels in the whole slide image. @@ -83,7 +83,9 @@ def get_level_count(self, wsi): raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: """ Extracts and returns a patch image form the whole slide image. @@ -100,7 +102,7 @@ def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") @abstractmethod - def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: """ Extracts and returns metadata form the whole slide image. @@ -122,12 +124,12 @@ def get_data( level: Optional[int] = None, dtype: DtypeLike = np.uint8, mode: str = "RGB", - ): + ) -> Tuple[np.ndarray, Dict]: """ Verifies inputs, extracts patches from WSI image and generates metadata, and return them. Args: - wsi: a whole slide image object loaded from a file or a lis of such objects + wsi: a whole slide image object loaded from a file or a list of such objects location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). size: (height, width) tuple giving the patch size at the given level (`level`). If None, it is set to the full image size at the given level. @@ -171,8 +173,12 @@ def get_data( # Extract a patch or the entire image patch = self.get_patch(each_wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) - # Verify patch image - patch = self.verify_output(patch, mode) + # check if the image has three dimensions (2D + color) + if patch.ndim != 3: + raise ValueError( + f"The image dimension should be 3 but has {patch.ndim}. " + "`WSIReader` is designed to work only with 2D images with color channel." + ) # Create a list of patches patch_list.append(patch) @@ -187,41 +193,23 @@ def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: """ Verify whether the specified file or files format is supported by WSI reader. - The list of supported suffixes are read from `self.supported_formats`. + The list of supported suffixes are read from `self.supported_suffixes`. Args: filename: filename or a list of filenames to read. """ - return is_supported_format(filename, self.supported_formats) - - def verify_output(self, patch: np.ndarray, mode: str): - """ - Verify output image patch to have consistent outputs - - Args: - patch: extracted patch from the whole slide image - """ - # check if the image has three dimensions (2D + color) - if patch.ndim != 3: - raise ValueError( - f"The image dimension should be 3 but has {patch.ndim}. " - "`WSIReader` is designed to work only with 2D images with color channel." - ) - - return patch + return is_supported_format(filename, self.supported_suffixes) class WSIReader(BaseWSIReader): """ - WSIReader that supports different implemented backends + Read whole slide images and extract patches using different backend libraries - The support for any backend can be achieved by - - .. code-block:: python - - if self.backend == "any_backend": - self.reader = AnyBackendWSIReader(level=level, **kwargs) + Args: + backend: the name of backend whole slide image reader library, the default is cuCIM. + level: the level at which patches are extracted. + kwargs: additional arguments to be passed to the backend library """ @@ -233,9 +221,9 @@ def __init__(self, backend="cucim", level: int = 0, **kwargs): self.reader = CuCIMWSIReader(level=level, **kwargs) else: raise ValueError("The supported backends are: cucim") - self.supported_formats = self.reader.supported_formats + self.supported_suffixes = self.reader.supported_suffixes - def get_level_count(self, wsi): + def get_level_count(self, wsi) -> int: """ Returns the number of levels in the whole slide image. @@ -256,7 +244,7 @@ def get_size(self, wsi, level) -> Tuple[int, int]: """ return self.reader.get_size(wsi, level) - def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: """ Extracts and returns metadata form the whole slide image. @@ -270,7 +258,9 @@ def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple """ return self.reader.get_metadata(patch=patch, size=size, location=location, level=level) - def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: """ Extracts and returns a patch image form the whole slide image. @@ -314,13 +304,13 @@ class CuCIMWSIReader(BaseWSIReader): """ - supported_formats = ["tif", "tiff", "svs"] + supported_suffixes = ["tif", "tiff", "svs"] def __init__(self, level: int = 0, **kwargs): super().__init__(level, **kwargs) @staticmethod - def get_level_count(wsi): + def get_level_count(wsi) -> int: """ Returns the number of levels in the whole slide image. @@ -328,7 +318,7 @@ def get_level_count(wsi): wsi: a whole slide image object loaded from a file """ - return wsi.resolutions["level_count"] + return wsi.resolutions["level_count"] # type: ignore @staticmethod def get_size(wsi, level) -> Tuple[int, int]: @@ -342,7 +332,7 @@ def get_size(wsi, level) -> Tuple[int, int]: """ return (wsi.resolutions["level_dimensions"][level][1], wsi.resolutions["level_dimensions"][level][0]) - def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int): + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: """ Extracts and returns metadata form the whole slide image. @@ -388,7 +378,9 @@ def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): return wsi_list if len(filenames) > 1 else wsi_list[0] - def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str): + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: """ Extracts and returns a patch image form the whole slide image. @@ -404,13 +396,13 @@ def get_patch(self, wsi, location: Tuple[int, int], size: Tuple[int, int], level """ # Extract a patch or the entire image # (reverse the order of location and size to become WxH for cuCIM) - patch = wsi.read_region(location=location[::-1], size=size[::-1], level=level) + patch: np.ndarray = wsi.read_region(location=location[::-1], size=size[::-1], level=level) # Convert to numpy patch = np.asarray(patch, dtype=dtype) # Make it channel first - patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) + patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) # type: ignore # Check if the color channel is 3 (RGB) or 4 (RGBA) if mode == "RGBA" and patch.shape[0] != 4: From 78340a91543b315dc5671b2c686fab66fb612a66 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 10:28:41 -0400 Subject: [PATCH 22/26] Redesign whole slide image reading (#4107) * Redesign BaseWSIReader, WSIReader, CuCIMWSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Add unittests for WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Add image mode for output validation Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update references to new WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Remove legacy WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update unittests Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * sort imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Clean up imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docs and docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix a typo Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Remove redundant checking Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update read and other methods Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update wsireader to support multi image and update docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Make workaround for CuImage objects Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Add unittests for multi image reading Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update a note about cucim Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update type hints and docstrings Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 13 + monai/apps/pathology/data/datasets.py | 2 +- monai/apps/pathology/metrics/lesion_froc.py | 2 +- monai/data/__init__.py | 3 +- monai/data/image_reader.py | 267 +------------ monai/data/wsi_reader.py | 420 ++++++++++++++++++++ tests/test_wsireader.py | 71 ++-- 7 files changed, 464 insertions(+), 314 deletions(-) create mode 100644 monai/data/wsi_reader.py diff --git a/docs/source/data.rst b/docs/source/data.rst index 0910001783..c968d72945 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -152,11 +152,24 @@ PILReader .. autoclass:: PILReader :members: +Whole slide image reader +------------------------ + +BaseWSIReader +~~~~~~~~~~~~~ +.. autoclass:: BaseWSIReader + :members: + WSIReader ~~~~~~~~~ .. autoclass:: WSIReader :members: +CuCIMWSIReader +~~~~~~~~~~~~~~ +.. autoclass:: CuCIMWSIReader + :members: + Image writer ------------ diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py index 71f3214ea4..756223a784 100644 --- a/monai/apps/pathology/data/datasets.py +++ b/monai/apps/pathology/data/datasets.py @@ -16,7 +16,7 @@ import numpy as np from monai.data import Dataset, SmartCacheDataset -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.utils import ensure_tuple_rep __all__ = ["PatchWSIDataset", "SmartCachePatchWSIDataset", "MaskedInferenceWSIDataset"] diff --git a/monai/apps/pathology/metrics/lesion_froc.py b/monai/apps/pathology/metrics/lesion_froc.py index 6073bd0cda..e48f2128fe 100644 --- a/monai/apps/pathology/metrics/lesion_froc.py +++ b/monai/apps/pathology/metrics/lesion_froc.py @@ -14,7 +14,7 @@ import numpy as np from monai.apps.pathology.utils import PathologyProbNMS, compute_isolated_tumor_cells, compute_multi_instance_mask -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.metrics import compute_fp_tp_probs, compute_froc_curve_data, compute_froc_score from monai.utils import min_version, optional_import diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 19ca29eafa..ca4be87ef6 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -34,7 +34,7 @@ from .folder_layout import FolderLayout from .grid_dataset import GridPatchDataset, PatchDataset, PatchIter, PatchIterd from .image_dataset import ImageDataset -from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader, WSIReader +from .image_reader import ImageReader, ITKReader, NibabelReader, NumpyReader, PILReader from .image_writer import ( SUPPORTED_WRITERS, ImageWriter, @@ -87,3 +87,4 @@ worker_init_fn, zoom_affine, ) +from .wsi_reader import BaseWSIReader, CuCIMWSIReader, WSIReader diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index ca77178e0b..f5d7fdef9d 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -19,8 +19,7 @@ from monai.config import DtypeLike, KeysCollection, PathLike from monai.data.utils import correct_nifti_header_if_necessary, is_supported_format, orientation_ras_lps -from monai.transforms.utility.array import EnsureChannelFirst -from monai.utils import ensure_tuple, ensure_tuple_rep, optional_import, require_pkg +from monai.utils import ensure_tuple, optional_import, require_pkg if TYPE_CHECKING: import itk @@ -39,7 +38,7 @@ CuImage, _ = optional_import("cucim", name="CuImage") TiffFile, _ = optional_import("tifffile", name="TiffFile") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader"] class ImageReader(ABC): @@ -714,265 +713,3 @@ def _get_spatial_shape(self, img): img: a PIL Image object loaded from an image file. """ return np.asarray((img.width, img.height)) - - -class WSIReader(ImageReader): - """ - Read whole slide images and extract patches. - - Args: - backend: backend library to load the images, available options: "cuCIM", "OpenSlide" and "TiffFile". - level: the whole slide image level at which the image is extracted. (default=0) - This is overridden if the level argument is provided in `get_data`. - kwargs: additional args for backend reading API in `read()`, more details in `cuCIM`, `TiffFile`, `OpenSlide`: - https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. - https://github.com/cgohlke/tifffile. - https://openslide.org/api/python/#openslide.OpenSlide. - - Note: - While "cuCIM" and "OpenSlide" backends both can load patches from large whole slide images - without loading the entire image into memory, "TiffFile" backend needs to load the entire image into memory - before extracting any patch; thus, memory consideration is needed when using "TiffFile" backend for - patch extraction. - - """ - - def __init__(self, backend: str = "OpenSlide", level: int = 0, **kwargs): - super().__init__() - self.backend = backend.lower() - func = require_pkg(self.backend)(self._set_reader) - self.wsi_reader = func(self.backend) - self.level = level - self.kwargs = kwargs - - @staticmethod - def _set_reader(backend: str): - if backend == "openslide": - return OpenSlide - if backend == "cucim": - return CuImage - if backend == "tifffile": - return TiffFile - raise ValueError("`backend` should be 'cuCIM', 'OpenSlide' or 'TiffFile'.") - - def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: - """ - Verify whether the specified file or files format is supported by WSI reader. - - Args: - filename: file name or a list of file names to read. - if a list of files, verify all the suffixes. - """ - return is_supported_format(filename, ["tif", "tiff"]) - - def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): - """ - Read image data from given file or list of files. - - Args: - data: file name or a list of file names to read. - kwargs: additional args for backend reading API in `read()`, will override `self.kwargs` for existing keys. - more details in `cuCIM`, `TiffFile`, `OpenSlide`: - https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. - https://github.com/cgohlke/tifffile. - https://openslide.org/api/python/#openslide.OpenSlide. - - Returns: - image object or list of image objects - - """ - img_: List = [] - - filenames: Sequence[PathLike] = ensure_tuple(data) - kwargs_ = self.kwargs.copy() - kwargs_.update(kwargs) - for name in filenames: - img = self.wsi_reader(name, **kwargs_) - if self.backend == "openslide": - img.shape = (img.dimensions[1], img.dimensions[0], 3) - img_.append(img) - - return img_ if len(filenames) > 1 else img_[0] - - def get_data( - self, - img, - location: Tuple[int, int] = (0, 0), - size: Optional[Tuple[int, int]] = None, - level: Optional[int] = None, - dtype: DtypeLike = np.uint8, - grid_shape: Tuple[int, int] = (1, 1), - patch_size: Optional[Union[int, Tuple[int, int]]] = None, - ): - """ - Extract regions as numpy array from WSI image and return them. - - Args: - img: a WSIReader image object loaded from a file, or list of CuImage objects - location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, - or list of tuples (default=(0, 0)) - size: (height, width) tuple giving the region size, or list of tuples (default to full image size) - This is the size of image at the given level (`level`) - level: the level number, or list of level numbers (default=0) - dtype: the data type of output image - grid_shape: (row, columns) tuple define a grid to extract patches on that - patch_size: (height, width) the size of extracted patches at the given level - """ - # Verify inputs - if level is None: - level = self.level - max_level = self._get_max_level(img) - if level > max_level: - raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") - - # Extract a region or the entire image - region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) - - # Add necessary metadata - metadata: Dict = {} - metadata["spatial_shape"] = np.asarray(region.shape[:-1]) - metadata["original_channel_dim"] = -1 - - # Make it channel first - region = EnsureChannelFirst()(region, metadata) - - # Split into patches - if patch_size is None: - patches = region - else: - tuple_patch_size = ensure_tuple_rep(patch_size, 2) - patches = self._extract_patches( - region, patch_size=tuple_patch_size, grid_shape=grid_shape, dtype=dtype # type: ignore - ) - - return patches, metadata - - def _get_max_level(self, img_obj): - """ - Return the maximum number of levels in the whole slide image - Args: - img: the whole slide image object - - """ - if self.backend == "openslide": - return img_obj.level_count - 1 - if self.backend == "cucim": - return img_obj.resolutions["level_count"] - 1 - if self.backend == "tifffile": - return len(img_obj.pages) - 1 - - def _get_image_size(self, img, size, level, location): - """ - Calculate the maximum region size for the given level and starting location (if size is None). - Note that region size in OpenSlide and cuCIM are WxH (but the final image output would be HxW) - """ - if size is not None: - return size[::-1] - - max_size = [] - downsampling_factor = [] - if self.backend == "openslide": - downsampling_factor = img.level_downsamples[level] - max_size = img.level_dimensions[level] - elif self.backend == "cucim": - downsampling_factor = img.resolutions["level_downsamples"][level] - max_size = img.resolutions["level_dimensions"][level] - - # subtract the top left corner of the patch (at given level) from maximum size - location_at_level = (round(location[1] / downsampling_factor), round(location[0] / downsampling_factor)) - size = [max_size[i] - location_at_level[i] for i in range(len(max_size))] - - return size - - def _extract_region( - self, - img_obj, - size: Optional[Tuple[int, int]], - location: Tuple[int, int] = (0, 0), - level: int = 0, - dtype: DtypeLike = np.uint8, - ): - if self.backend == "tifffile": - # Read the entire image - if size is not None: - raise ValueError( - f"TiffFile backend reads the entire image only, so size '{size}'' should not be provided!", - "For more flexibility or extracting regions, please use cuCIM or OpenSlide backend.", - ) - if location != (0, 0): - raise ValueError( - f"TiffFile backend reads the entire image only, so location '{location}' should not be provided!", - "For more flexibility and extracting regions, please use cuCIM or OpenSlide backend.", - ) - region = img_obj.asarray(level=level) - else: - # Get region size to be extracted - region_size = self._get_image_size(img_obj, size, level, location) - # reverse the order of location's dimensions to become WxH (for cuCIM and OpenSlide) - region_location = location[::-1] - # Extract a region (or the entire image) - region = img_obj.read_region(location=region_location, size=region_size, level=level) - - region = self.convert_to_rgb_array(region, dtype) - return region - - def convert_to_rgb_array(self, raw_region, dtype: DtypeLike = np.uint8): - """Convert to RGB mode and numpy array""" - if self.backend == "openslide": - # convert to RGB - raw_region = raw_region.convert("RGB") - - # convert to numpy (if not already in numpy) - raw_region = np.asarray(raw_region, dtype=dtype) - - # check if the image has three dimensions (2D + color) - if raw_region.ndim != 3: - raise ValueError( - f"The input image dimension should be 3 but {raw_region.ndim} is given. " - "`WSIReader` is designed to work only with 2D colored images." - ) - - # check if the color channel is 3 (RGB) or 4 (RGBA) - if raw_region.shape[-1] not in [3, 4]: - raise ValueError( - f"There should be three or four color channels but {raw_region.shape[-1]} is given. " - "`WSIReader` is designed to work only with 2D colored images." - ) - - # remove alpha channel if exist (RGBA) - if raw_region.shape[-1] > 3: - raw_region = raw_region[..., :3] - - return raw_region - - def _extract_patches( - self, - region: np.ndarray, - grid_shape: Tuple[int, int] = (1, 1), - patch_size: Optional[Tuple[int, int]] = None, - dtype: DtypeLike = np.uint8, - ): - if patch_size is None and grid_shape == (1, 1): - return region - - n_patches = grid_shape[0] * grid_shape[1] - region_size = region.shape[1:] - - if patch_size is None: - patch_size = (region_size[0] // grid_shape[0], region_size[1] // grid_shape[1]) - - # split the region into patches on the grid and center crop them to patch size - flat_patch_grid = np.zeros((n_patches, 3, patch_size[0], patch_size[1]), dtype=dtype) - start_points = [ - np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size[i] / 2).astype(int) - for i in range(2) - ] - idx = 0 - for y_start in start_points[1]: - for x_start in start_points[0]: - x_end = x_start + patch_size[0] - y_end = y_start + patch_size[1] - flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] - idx += 1 - - return flat_patch_grid diff --git a/monai/data/wsi_reader.py b/monai/data/wsi_reader.py new file mode 100644 index 0000000000..4899fb8830 --- /dev/null +++ b/monai/data/wsi_reader.py @@ -0,0 +1,420 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import abstractmethod +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +import numpy as np + +from monai.config import DtypeLike, PathLike +from monai.data.image_reader import ImageReader, _stack_images +from monai.data.utils import is_supported_format +from monai.transforms.utility.array import EnsureChannelFirst +from monai.utils import ensure_tuple, optional_import, require_pkg + +CuImage, _ = optional_import("cucim", name="CuImage") + +__all__ = ["BaseWSIReader", "WSIReader", "CuCIMWSIReader"] + + +class BaseWSIReader(ImageReader): + """ + An abstract class that defines APIs to load patches from whole slide image files. + + Typical usage of a concrete implementation of this class is: + + .. code-block:: python + + image_reader = MyWSIReader() + wsi = image_reader.read(, **kwargs) + img_data, meta_data = image_reader.get_data(wsi) + + - The `read` call converts an image filename into whole slide image object, + - The `get_data` call fetches the image data, as well as meta data. + + The following methods needs to be implemented for any concrete implementation of this class: + + - `read` reads a whole slide image object from a given file + - `get_size` returns the size of the whole slide image of a given wsi object at a given level. + - `get_level_count` returns the number of levels in the whole slide image + - `get_patch` extracts and returns a patch image form the whole slide image + - `get_metadata` extracts and returns metadata for a whole slide image and a specific patch. + + + """ + + supported_suffixes: List[str] = [] + + def __init__(self, level: int, **kwargs): + super().__init__() + self.level = level + self.kwargs = kwargs + self.metadata: Dict[Any, Any] = {} + + @abstractmethod + def get_size(self, wsi, level: int) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def get_level_count(self, wsi) -> int: + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + @abstractmethod + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: + """ + Extracts and returns metadata form the whole slide image. + + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.") + + def get_data( + self, + wsi, + location: Tuple[int, int] = (0, 0), + size: Optional[Tuple[int, int]] = None, + level: Optional[int] = None, + dtype: DtypeLike = np.uint8, + mode: str = "RGB", + ) -> Tuple[np.ndarray, Dict]: + """ + Verifies inputs, extracts patches from WSI image and generates metadata, and return them. + + Args: + wsi: a whole slide image object loaded from a file or a list of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + Returns: + a tuples, where the first element is an image patch [CxHxW] or stack of patches, + and second element is a dictionary of metadata + """ + patch_list: List = [] + metadata = {} + # CuImage object is iterable, so ensure_tuple won't work on single object + if not isinstance(wsi, List): + wsi = [wsi] + for each_wsi in ensure_tuple(wsi): + # Verify magnification level + if level is None: + level = self.level + max_level = self.get_level_count(each_wsi) - 1 + if level > max_level: + raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") + + # Verify location + if location is None: + location = (0, 0) + wsi_size = self.get_size(each_wsi, level) + if location[0] > wsi_size[0] or location[1] > wsi_size[1]: + raise ValueError(f"Location is outside of the image: location={location}, image size={wsi_size}") + + # Verify size + if size is None: + if location != (0, 0): + raise ValueError("Patch size should be defined to exctract patches.") + size = self.get_size(each_wsi, level) + else: + if size[0] <= 0 or size[1] <= 0: + raise ValueError(f"Patch size should be greater than zero, provided: patch size = {size}") + + # Extract a patch or the entire image + patch = self.get_patch(each_wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + + # check if the image has three dimensions (2D + color) + if patch.ndim != 3: + raise ValueError( + f"The image dimension should be 3 but has {patch.ndim}. " + "`WSIReader` is designed to work only with 2D images with color channel." + ) + + # Create a list of patches + patch_list.append(patch) + + # Set patch-related metadata + each_meta = self.get_metadata(patch=patch, location=location, size=size, level=level) + metadata.update(each_meta) + + return _stack_images(patch_list, metadata), metadata + + def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: + """ + Verify whether the specified file or files format is supported by WSI reader. + + The list of supported suffixes are read from `self.supported_suffixes`. + + Args: + filename: filename or a list of filenames to read. + + """ + return is_supported_format(filename, self.supported_suffixes) + + +class WSIReader(BaseWSIReader): + """ + Read whole slide images and extract patches using different backend libraries + + Args: + backend: the name of backend whole slide image reader library, the default is cuCIM. + level: the level at which patches are extracted. + kwargs: additional arguments to be passed to the backend library + + """ + + def __init__(self, backend="cucim", level: int = 0, **kwargs): + super().__init__(level, **kwargs) + self.backend = backend.lower() + # Any new backend can be added below + if self.backend == "cucim": + self.reader = CuCIMWSIReader(level=level, **kwargs) + else: + raise ValueError("The supported backends are: cucim") + self.supported_suffixes = self.reader.supported_suffixes + + def get_level_count(self, wsi) -> int: + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ + return self.reader.get_level_count(wsi) + + def get_size(self, wsi, level) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return self.reader.get_size(wsi, level) + + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: + """ + Extracts and returns metadata form the whole slide image. + + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ + return self.reader.get_metadata(patch=patch, size=size, location=location, level=level) + + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ + return self.reader.get_patch(wsi=wsi, location=location, size=size, level=level, dtype=dtype, mode=mode) + + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + """ + Read whole slide image objects from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args for the reader module (overrides `self.kwargs` for existing keys). + + Returns: + whole slide image object or list of such objects + + """ + return self.reader.read(data=data, **kwargs) + + +@require_pkg(pkg_name="cucim") +class CuCIMWSIReader(BaseWSIReader): + """ + Read whole slide images and extract patches without loading the whole slide image into the memory. + + Args: + level: the whole slide image level at which the image is extracted. (default=0) + This is overridden if the level argument is provided in `get_data`. + kwargs: additional args for `cucim.CuImage` module: + https://github.com/rapidsai/cucim/blob/main/cpp/include/cucim/cuimage.h + + """ + + supported_suffixes = ["tif", "tiff", "svs"] + + def __init__(self, level: int = 0, **kwargs): + super().__init__(level, **kwargs) + + @staticmethod + def get_level_count(wsi) -> int: + """ + Returns the number of levels in the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file + + """ + return wsi.resolutions["level_count"] # type: ignore + + @staticmethod + def get_size(wsi, level) -> Tuple[int, int]: + """ + Returns the size of the whole slide image at a given level. + + Args: + wsi: a whole slide image object loaded from a file + level: the level number where the size is calculated + + """ + return (wsi.resolutions["level_dimensions"][level][1], wsi.resolutions["level_dimensions"][level][0]) + + def get_metadata(self, patch: np.ndarray, location: Tuple[int, int], size: Tuple[int, int], level: int) -> Dict: + """ + Extracts and returns metadata form the whole slide image. + + Args: + patch: extracted patch from whole slide image + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + + """ + metadata: Dict = { + "backend": "cucim", + "spatial_shape": np.asarray(patch.shape[1:]), + "original_channel_dim": 0, + "location": location, + "size": size, + "level": level, + } + return metadata + + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + """ + Read whole slide image objects from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args that overrides `self.kwargs` for existing keys. + For more details look at https://github.com/rapidsai/cucim/blob/main/cpp/include/cucim/cuimage.h + + Returns: + whole slide image object or list of such objects + + """ + wsi_list: List = [] + + filenames: Sequence[PathLike] = ensure_tuple(data) + kwargs_ = self.kwargs.copy() + kwargs_.update(kwargs) + for filename in filenames: + wsi = CuImage(filename, **kwargs_) + wsi_list.append(wsi) + + return wsi_list if len(filenames) > 1 else wsi_list[0] + + def get_patch( + self, wsi, location: Tuple[int, int], size: Tuple[int, int], level: int, dtype: DtypeLike, mode: str + ) -> np.ndarray: + """ + Extracts and returns a patch image form the whole slide image. + + Args: + wsi: a whole slide image object loaded from a file or a lis of such objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame. Defaults to (0, 0). + size: (height, width) tuple giving the patch size at the given level (`level`). + If None, it is set to the full image size at the given level. + level: the level number. Defaults to 0 + dtype: the data type of output image + mode: the output image mode, 'RGB' or 'RGBA' + + """ + # Extract a patch or the entire image + # (reverse the order of location and size to become WxH for cuCIM) + patch: np.ndarray = wsi.read_region(location=location[::-1], size=size[::-1], level=level) + + # Convert to numpy + patch = np.asarray(patch, dtype=dtype) + + # Make it channel first + patch = EnsureChannelFirst()(patch, {"original_channel_dim": -1}) # type: ignore + + # Check if the color channel is 3 (RGB) or 4 (RGBA) + if mode == "RGBA" and patch.shape[0] != 4: + raise ValueError( + f"The image is expected to have four color channels in '{mode}' mode but has {patch.shape[0]}." + ) + + if mode in "RGB": + if patch.shape[0] not in [3, 4]: + raise ValueError( + f"The image is expected to have three or four color channels in '{mode}' mode but has {patch.shape[0]}. " + ) + patch = patch[:3] + + return patch diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index 6ee02143b8..7b288f6040 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -19,7 +19,7 @@ from parameterized import parameterized from monai.data import DataLoader, Dataset -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.transforms import Compose, LoadImaged, ToTensord from monai.utils import first, optional_import from monai.utils.enums import PostFix @@ -57,29 +57,17 @@ ] TEST_CASE_3 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, - np.array( + [FILE_PATH, FILE_PATH], + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.concatenate( [ - [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], - [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], - ] + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + ], + axis=0, ), ] -TEST_CASE_4 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), -] - -TEST_CASE_5 = [ - FILE_PATH, - {"location": (HEIGHT - 2, WIDTH - 2), "level": 0, "grid_shape": (1, 1)}, - np.array([[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[237, 237], [237, 237]]]), -] - - TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW @@ -138,7 +126,7 @@ def test_read_whole_image(self, file_path, level, expected_shape): img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) - @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) def test_read_region(self, file_path, patch_info, expected_img): kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} reader = WSIReader(self.backend, **kwargs) @@ -155,17 +143,22 @@ def test_read_region(self, file_path, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) - def test_read_patches(self, file_path, patch_info, expected_img): - reader = WSIReader(self.backend) - with reader.read(file_path) as img_obj: - if self.backend == "tifffile": - with self.assertRaises(ValueError): - reader.get_data(img_obj, **patch_info)[0] - else: - img = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_3]) + def test_read_region_multi_wsi(self, file_path, patch_info, expected_img): + kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} + reader = WSIReader(self.backend, **kwargs) + img_obj = reader.read(file_path, **kwargs) + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") @@ -221,19 +214,5 @@ def setUpClass(cls): cls.backend = "cucim" -@skipUnless(has_osl, "Requires OpenSlide") -class TestOpenSlide(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "openslide" - - -@skipUnless(has_tiff, "Requires TiffFile") -class TestTiffFile(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "tifffile" - - if __name__ == "__main__": unittest.main() From a592482972d2a9b7b7f934dfd940110cdcacc878 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 22:09:16 +0000 Subject: [PATCH 23/26] Bring back previous WSIReader for backward compatibility Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/image_reader.py | 267 ++++++++++++++++++++++++++++++++++++- 1 file changed, 265 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index f5d7fdef9d..ca77178e0b 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -19,7 +19,8 @@ from monai.config import DtypeLike, KeysCollection, PathLike from monai.data.utils import correct_nifti_header_if_necessary, is_supported_format, orientation_ras_lps -from monai.utils import ensure_tuple, optional_import, require_pkg +from monai.transforms.utility.array import EnsureChannelFirst +from monai.utils import ensure_tuple, ensure_tuple_rep, optional_import, require_pkg if TYPE_CHECKING: import itk @@ -38,7 +39,7 @@ CuImage, _ = optional_import("cucim", name="CuImage") TiffFile, _ = optional_import("tifffile", name="TiffFile") -__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader"] +__all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"] class ImageReader(ABC): @@ -713,3 +714,265 @@ def _get_spatial_shape(self, img): img: a PIL Image object loaded from an image file. """ return np.asarray((img.width, img.height)) + + +class WSIReader(ImageReader): + """ + Read whole slide images and extract patches. + + Args: + backend: backend library to load the images, available options: "cuCIM", "OpenSlide" and "TiffFile". + level: the whole slide image level at which the image is extracted. (default=0) + This is overridden if the level argument is provided in `get_data`. + kwargs: additional args for backend reading API in `read()`, more details in `cuCIM`, `TiffFile`, `OpenSlide`: + https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. + https://github.com/cgohlke/tifffile. + https://openslide.org/api/python/#openslide.OpenSlide. + + Note: + While "cuCIM" and "OpenSlide" backends both can load patches from large whole slide images + without loading the entire image into memory, "TiffFile" backend needs to load the entire image into memory + before extracting any patch; thus, memory consideration is needed when using "TiffFile" backend for + patch extraction. + + """ + + def __init__(self, backend: str = "OpenSlide", level: int = 0, **kwargs): + super().__init__() + self.backend = backend.lower() + func = require_pkg(self.backend)(self._set_reader) + self.wsi_reader = func(self.backend) + self.level = level + self.kwargs = kwargs + + @staticmethod + def _set_reader(backend: str): + if backend == "openslide": + return OpenSlide + if backend == "cucim": + return CuImage + if backend == "tifffile": + return TiffFile + raise ValueError("`backend` should be 'cuCIM', 'OpenSlide' or 'TiffFile'.") + + def verify_suffix(self, filename: Union[Sequence[PathLike], PathLike]) -> bool: + """ + Verify whether the specified file or files format is supported by WSI reader. + + Args: + filename: file name or a list of file names to read. + if a list of files, verify all the suffixes. + """ + return is_supported_format(filename, ["tif", "tiff"]) + + def read(self, data: Union[Sequence[PathLike], PathLike, np.ndarray], **kwargs): + """ + Read image data from given file or list of files. + + Args: + data: file name or a list of file names to read. + kwargs: additional args for backend reading API in `read()`, will override `self.kwargs` for existing keys. + more details in `cuCIM`, `TiffFile`, `OpenSlide`: + https://github.com/rapidsai/cucim/blob/v21.12.00/cpp/include/cucim/cuimage.h#L100. + https://github.com/cgohlke/tifffile. + https://openslide.org/api/python/#openslide.OpenSlide. + + Returns: + image object or list of image objects + + """ + img_: List = [] + + filenames: Sequence[PathLike] = ensure_tuple(data) + kwargs_ = self.kwargs.copy() + kwargs_.update(kwargs) + for name in filenames: + img = self.wsi_reader(name, **kwargs_) + if self.backend == "openslide": + img.shape = (img.dimensions[1], img.dimensions[0], 3) + img_.append(img) + + return img_ if len(filenames) > 1 else img_[0] + + def get_data( + self, + img, + location: Tuple[int, int] = (0, 0), + size: Optional[Tuple[int, int]] = None, + level: Optional[int] = None, + dtype: DtypeLike = np.uint8, + grid_shape: Tuple[int, int] = (1, 1), + patch_size: Optional[Union[int, Tuple[int, int]]] = None, + ): + """ + Extract regions as numpy array from WSI image and return them. + + Args: + img: a WSIReader image object loaded from a file, or list of CuImage objects + location: (x_min, y_min) tuple giving the top left pixel in the level 0 reference frame, + or list of tuples (default=(0, 0)) + size: (height, width) tuple giving the region size, or list of tuples (default to full image size) + This is the size of image at the given level (`level`) + level: the level number, or list of level numbers (default=0) + dtype: the data type of output image + grid_shape: (row, columns) tuple define a grid to extract patches on that + patch_size: (height, width) the size of extracted patches at the given level + """ + # Verify inputs + if level is None: + level = self.level + max_level = self._get_max_level(img) + if level > max_level: + raise ValueError(f"The maximum level of this image is {max_level} while level={level} is requested)!") + + # Extract a region or the entire image + region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) + + # Add necessary metadata + metadata: Dict = {} + metadata["spatial_shape"] = np.asarray(region.shape[:-1]) + metadata["original_channel_dim"] = -1 + + # Make it channel first + region = EnsureChannelFirst()(region, metadata) + + # Split into patches + if patch_size is None: + patches = region + else: + tuple_patch_size = ensure_tuple_rep(patch_size, 2) + patches = self._extract_patches( + region, patch_size=tuple_patch_size, grid_shape=grid_shape, dtype=dtype # type: ignore + ) + + return patches, metadata + + def _get_max_level(self, img_obj): + """ + Return the maximum number of levels in the whole slide image + Args: + img: the whole slide image object + + """ + if self.backend == "openslide": + return img_obj.level_count - 1 + if self.backend == "cucim": + return img_obj.resolutions["level_count"] - 1 + if self.backend == "tifffile": + return len(img_obj.pages) - 1 + + def _get_image_size(self, img, size, level, location): + """ + Calculate the maximum region size for the given level and starting location (if size is None). + Note that region size in OpenSlide and cuCIM are WxH (but the final image output would be HxW) + """ + if size is not None: + return size[::-1] + + max_size = [] + downsampling_factor = [] + if self.backend == "openslide": + downsampling_factor = img.level_downsamples[level] + max_size = img.level_dimensions[level] + elif self.backend == "cucim": + downsampling_factor = img.resolutions["level_downsamples"][level] + max_size = img.resolutions["level_dimensions"][level] + + # subtract the top left corner of the patch (at given level) from maximum size + location_at_level = (round(location[1] / downsampling_factor), round(location[0] / downsampling_factor)) + size = [max_size[i] - location_at_level[i] for i in range(len(max_size))] + + return size + + def _extract_region( + self, + img_obj, + size: Optional[Tuple[int, int]], + location: Tuple[int, int] = (0, 0), + level: int = 0, + dtype: DtypeLike = np.uint8, + ): + if self.backend == "tifffile": + # Read the entire image + if size is not None: + raise ValueError( + f"TiffFile backend reads the entire image only, so size '{size}'' should not be provided!", + "For more flexibility or extracting regions, please use cuCIM or OpenSlide backend.", + ) + if location != (0, 0): + raise ValueError( + f"TiffFile backend reads the entire image only, so location '{location}' should not be provided!", + "For more flexibility and extracting regions, please use cuCIM or OpenSlide backend.", + ) + region = img_obj.asarray(level=level) + else: + # Get region size to be extracted + region_size = self._get_image_size(img_obj, size, level, location) + # reverse the order of location's dimensions to become WxH (for cuCIM and OpenSlide) + region_location = location[::-1] + # Extract a region (or the entire image) + region = img_obj.read_region(location=region_location, size=region_size, level=level) + + region = self.convert_to_rgb_array(region, dtype) + return region + + def convert_to_rgb_array(self, raw_region, dtype: DtypeLike = np.uint8): + """Convert to RGB mode and numpy array""" + if self.backend == "openslide": + # convert to RGB + raw_region = raw_region.convert("RGB") + + # convert to numpy (if not already in numpy) + raw_region = np.asarray(raw_region, dtype=dtype) + + # check if the image has three dimensions (2D + color) + if raw_region.ndim != 3: + raise ValueError( + f"The input image dimension should be 3 but {raw_region.ndim} is given. " + "`WSIReader` is designed to work only with 2D colored images." + ) + + # check if the color channel is 3 (RGB) or 4 (RGBA) + if raw_region.shape[-1] not in [3, 4]: + raise ValueError( + f"There should be three or four color channels but {raw_region.shape[-1]} is given. " + "`WSIReader` is designed to work only with 2D colored images." + ) + + # remove alpha channel if exist (RGBA) + if raw_region.shape[-1] > 3: + raw_region = raw_region[..., :3] + + return raw_region + + def _extract_patches( + self, + region: np.ndarray, + grid_shape: Tuple[int, int] = (1, 1), + patch_size: Optional[Tuple[int, int]] = None, + dtype: DtypeLike = np.uint8, + ): + if patch_size is None and grid_shape == (1, 1): + return region + + n_patches = grid_shape[0] * grid_shape[1] + region_size = region.shape[1:] + + if patch_size is None: + patch_size = (region_size[0] // grid_shape[0], region_size[1] // grid_shape[1]) + + # split the region into patches on the grid and center crop them to patch size + flat_patch_grid = np.zeros((n_patches, 3, patch_size[0], patch_size[1]), dtype=dtype) + start_points = [ + np.round(region_size[i] * (0.5 + np.arange(grid_shape[i])) / grid_shape[i] - patch_size[i] / 2).astype(int) + for i in range(2) + ] + idx = 0 + for y_start in start_points[1]: + for x_start in start_points[0]: + x_end = x_start + patch_size[0] + y_end = y_start + patch_size[1] + flat_patch_grid[idx] = region[:, x_start:x_end, y_start:y_end] + idx += 1 + + return flat_patch_grid From 41337db5d460c3dece37ad78088d947b2240b8e9 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 22:15:27 +0000 Subject: [PATCH 24/26] Revert to legacy WSIReader Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/apps/pathology/data/datasets.py | 2 +- monai/apps/pathology/metrics/lesion_froc.py | 2 +- tests/min_tests.py | 1 + tests/test_wsireader_deprecated.py | 239 ++++++++++++++++++++ 4 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 tests/test_wsireader_deprecated.py diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py index 756223a784..71f3214ea4 100644 --- a/monai/apps/pathology/data/datasets.py +++ b/monai/apps/pathology/data/datasets.py @@ -16,7 +16,7 @@ import numpy as np from monai.data import Dataset, SmartCacheDataset -from monai.data.wsi_reader import WSIReader +from monai.data.image_reader import WSIReader from monai.utils import ensure_tuple_rep __all__ = ["PatchWSIDataset", "SmartCachePatchWSIDataset", "MaskedInferenceWSIDataset"] diff --git a/monai/apps/pathology/metrics/lesion_froc.py b/monai/apps/pathology/metrics/lesion_froc.py index e48f2128fe..6073bd0cda 100644 --- a/monai/apps/pathology/metrics/lesion_froc.py +++ b/monai/apps/pathology/metrics/lesion_froc.py @@ -14,7 +14,7 @@ import numpy as np from monai.apps.pathology.utils import PathologyProbNMS, compute_isolated_tumor_cells, compute_multi_instance_mask -from monai.data.wsi_reader import WSIReader +from monai.data.image_reader import WSIReader from monai.metrics import compute_fp_tp_probs, compute_froc_curve_data, compute_froc_score from monai.utils import min_version, optional_import diff --git a/tests/min_tests.py b/tests/min_tests.py index 66b6c9ff3d..074a4b6861 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -157,6 +157,7 @@ def run_testsuit(): "test_vitautoenc", "test_write_metrics_reports", "test_wsireader", + "test_wsireader_deprecated", "test_zoom", "test_zoom_affine", "test_zoomd", diff --git a/tests/test_wsireader_deprecated.py b/tests/test_wsireader_deprecated.py new file mode 100644 index 0000000000..6ee02143b8 --- /dev/null +++ b/tests/test_wsireader_deprecated.py @@ -0,0 +1,239 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +from unittest import skipUnless + +import numpy as np +import torch +from numpy.testing import assert_array_equal +from parameterized import parameterized + +from monai.data import DataLoader, Dataset +from monai.data.image_reader import WSIReader +from monai.transforms import Compose, LoadImaged, ToTensord +from monai.utils import first, optional_import +from monai.utils.enums import PostFix +from tests.utils import download_url_or_skip_test, testing_data_config + +cucim, has_cucim = optional_import("cucim") +has_cucim = has_cucim and hasattr(cucim, "CuImage") +openslide, has_osl = optional_import("openslide") +imwrite, has_tiff = optional_import("tifffile", name="imwrite") +_, has_codec = optional_import("imagecodecs") +has_tiff = has_tiff and has_codec + +FILE_KEY = "wsi_img" +FILE_URL = testing_data_config("images", FILE_KEY, "url") +base_name, extension = os.path.basename(f"{FILE_URL}"), ".tiff" +FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) + +HEIGHT = 32914 +WIDTH = 46000 + +TEST_CASE_0 = [FILE_PATH, 2, (3, HEIGHT // 4, WIDTH // 4)] + +TEST_CASE_TRANSFORM_0 = [FILE_PATH, 4, (HEIGHT // 16, WIDTH // 16), (1, 3, HEIGHT // 16, WIDTH // 16)] + +TEST_CASE_1 = [ + FILE_PATH, + {"location": (HEIGHT // 2, WIDTH // 2), "size": (2, 1), "level": 0}, + np.array([[[246], [246]], [[246], [246]], [[246], [246]]]), +] + +TEST_CASE_2 = [ + FILE_PATH, + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), +] + +TEST_CASE_3 = [ + FILE_PATH, + {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + np.array( + [ + [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], + [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], + ] + ), +] + +TEST_CASE_4 = [ + FILE_PATH, + {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), +] + +TEST_CASE_5 = [ + FILE_PATH, + {"location": (HEIGHT - 2, WIDTH - 2), "level": 0, "grid_shape": (1, 1)}, + np.array([[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[237, 237], [237, 237]]]), +] + + +TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW + +TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW + +TEST_CASE_ERROR_GRAY = [np.ones((16, 16), dtype=np.uint8)] # no color channel +TEST_CASE_ERROR_3D = [np.ones((16, 16, 16, 3), dtype=np.uint8)] # 3D + color + + +def save_rgba_tiff(array: np.ndarray, filename: str, mode: str): + """ + Save numpy array into a TIFF RGB/RGBA file + + Args: + array: numpy ndarray with the shape of CxHxW and C==3 representing a RGB image + filename: the filename to be used for the tiff file. '_RGB.tiff' or '_RGBA.tiff' will be appended to this filename. + mode: RGB or RGBA + """ + if mode == "RGBA": + array = np.concatenate([array, 255 * np.ones_like(array[0])[np.newaxis]]).astype(np.uint8) + + img_rgb = array.transpose(1, 2, 0) + imwrite(filename, img_rgb, shape=img_rgb.shape, tile=(16, 16)) + + return filename + + +def save_gray_tiff(array: np.ndarray, filename: str): + """ + Save numpy array into a TIFF file + + Args: + array: numpy ndarray with any shape + filename: the filename to be used for the tiff file. + """ + img_gray = array + imwrite(filename, img_gray, shape=img_gray.shape, photometric="rgb") + + return filename + + +@skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") +def setUpModule(): # noqa: N802 + hash_type = testing_data_config("images", FILE_KEY, "hash_type") + hash_val = testing_data_config("images", FILE_KEY, "hash_val") + download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) + + +class WSIReaderTests: + class Tests(unittest.TestCase): + backend = None + + @parameterized.expand([TEST_CASE_0]) + def test_read_whole_image(self, file_path, level, expected_shape): + reader = WSIReader(self.backend, level=level) + with reader.read(file_path) as img_obj: + img = reader.get_data(img_obj)[0] + self.assertTupleEqual(img.shape, expected_shape) + + @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) + def test_read_region(self, file_path, patch_info, expected_img): + kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} + reader = WSIReader(self.backend, **kwargs) + with reader.read(file_path, **kwargs) as img_obj: + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) + def test_read_patches(self, file_path, patch_info, expected_img): + reader = WSIReader(self.backend) + with reader.read(file_path) as img_obj: + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + img = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) + + @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) + @skipUnless(has_tiff, "Requires tifffile.") + def test_read_rgba(self, img_expected): + # skip for OpenSlide since not working with images without tiles + if self.backend == "openslide": + return + image = {} + reader = WSIReader(self.backend) + for mode in ["RGB", "RGBA"]: + file_path = save_rgba_tiff( + img_expected, + os.path.join(os.path.dirname(__file__), "testing_data", f"temp_tiff_image_{mode}.tiff"), + mode=mode, + ) + with reader.read(file_path) as img_obj: + image[mode], _ = reader.get_data(img_obj) + + self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) + self.assertIsNone(assert_array_equal(image["RGBA"], img_expected)) + + @parameterized.expand([TEST_CASE_ERROR_GRAY, TEST_CASE_ERROR_3D]) + @skipUnless(has_tiff, "Requires tifffile.") + def test_read_malformats(self, img_expected): + reader = WSIReader(self.backend) + file_path = save_gray_tiff( + img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") + ) + with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): + with reader.read(file_path) as img_obj: + reader.get_data(img_obj) + + @parameterized.expand([TEST_CASE_TRANSFORM_0]) + def test_with_dataloader(self, file_path, level, expected_spatial_shape, expected_shape): + train_transform = Compose( + [ + LoadImaged(keys=["image"], reader=WSIReader, backend=self.backend, level=level), + ToTensord(keys=["image"]), + ] + ) + dataset = Dataset([{"image": file_path}], transform=train_transform) + data_loader = DataLoader(dataset) + data: dict = first(data_loader) + for s in data[PostFix.meta("image")]["spatial_shape"]: + torch.testing.assert_allclose(s, expected_spatial_shape) + self.assertTupleEqual(data["image"].shape, expected_shape) + + +@skipUnless(has_cucim, "Requires cucim") +class TestCuCIM(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "cucim" + + +@skipUnless(has_osl, "Requires OpenSlide") +class TestOpenSlide(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "openslide" + + +@skipUnless(has_tiff, "Requires TiffFile") +class TestTiffFile(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "tifffile" + + +if __name__ == "__main__": + unittest.main() From daec9c7e7255a07c7a84b0298db9648da8e2ffc3 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 22:27:20 +0000 Subject: [PATCH 25/26] Rename test files Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_wsireader.py | 71 ++++++++++++------- ...er_deprecated.py => test_wsireader_new.py} | 71 +++++++------------ 2 files changed, 71 insertions(+), 71 deletions(-) rename tests/{test_wsireader_deprecated.py => test_wsireader_new.py} (80%) diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index 7b288f6040..6ee02143b8 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -19,7 +19,7 @@ from parameterized import parameterized from monai.data import DataLoader, Dataset -from monai.data.wsi_reader import WSIReader +from monai.data.image_reader import WSIReader from monai.transforms import Compose, LoadImaged, ToTensord from monai.utils import first, optional_import from monai.utils.enums import PostFix @@ -57,17 +57,29 @@ ] TEST_CASE_3 = [ - [FILE_PATH, FILE_PATH], - {"location": (0, 0), "size": (2, 1), "level": 2}, - np.concatenate( + FILE_PATH, + {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, + np.array( [ - np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), - np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), - ], - axis=0, + [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], + [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], + ] ), ] +TEST_CASE_4 = [ + FILE_PATH, + {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, + np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), +] + +TEST_CASE_5 = [ + FILE_PATH, + {"location": (HEIGHT - 2, WIDTH - 2), "level": 0, "grid_shape": (1, 1)}, + np.array([[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[237, 237], [237, 237]]]), +] + + TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW @@ -126,7 +138,7 @@ def test_read_whole_image(self, file_path, level, expected_shape): img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) - @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) def test_read_region(self, file_path, patch_info, expected_img): kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} reader = WSIReader(self.backend, **kwargs) @@ -143,22 +155,17 @@ def test_read_region(self, file_path, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_3]) - def test_read_region_multi_wsi(self, file_path, patch_info, expected_img): - kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} - reader = WSIReader(self.backend, **kwargs) - img_obj = reader.read(file_path, **kwargs) - if self.backend == "tifffile": - with self.assertRaises(ValueError): - reader.get_data(img_obj, **patch_info)[0] - else: - # Read twice to check multiple calls - img = reader.get_data(img_obj, **patch_info)[0] - img2 = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, img2.shape) - self.assertIsNone(assert_array_equal(img, img2)) - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) + def test_read_patches(self, file_path, patch_info, expected_img): + reader = WSIReader(self.backend) + with reader.read(file_path) as img_obj: + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + img = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") @@ -214,5 +221,19 @@ def setUpClass(cls): cls.backend = "cucim" +@skipUnless(has_osl, "Requires OpenSlide") +class TestOpenSlide(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "openslide" + + +@skipUnless(has_tiff, "Requires TiffFile") +class TestTiffFile(WSIReaderTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "tifffile" + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_wsireader_deprecated.py b/tests/test_wsireader_new.py similarity index 80% rename from tests/test_wsireader_deprecated.py rename to tests/test_wsireader_new.py index 6ee02143b8..7b288f6040 100644 --- a/tests/test_wsireader_deprecated.py +++ b/tests/test_wsireader_new.py @@ -19,7 +19,7 @@ from parameterized import parameterized from monai.data import DataLoader, Dataset -from monai.data.image_reader import WSIReader +from monai.data.wsi_reader import WSIReader from monai.transforms import Compose, LoadImaged, ToTensord from monai.utils import first, optional_import from monai.utils.enums import PostFix @@ -57,29 +57,17 @@ ] TEST_CASE_3 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2}, - np.array( + [FILE_PATH, FILE_PATH], + {"location": (0, 0), "size": (2, 1), "level": 2}, + np.concatenate( [ - [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]], - [[[242, 242], [242, 243]], [[242, 242], [242, 243]], [[242, 242], [242, 243]]], - ] + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + np.array([[[239], [239]], [[239], [239]], [[239], [239]]]), + ], + axis=0, ), ] -TEST_CASE_4 = [ - FILE_PATH, - {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1}, - np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]), -] - -TEST_CASE_5 = [ - FILE_PATH, - {"location": (HEIGHT - 2, WIDTH - 2), "level": 0, "grid_shape": (1, 1)}, - np.array([[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[237, 237], [237, 237]]]), -] - - TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)] # CHW TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW @@ -138,7 +126,7 @@ def test_read_whole_image(self, file_path, level, expected_shape): img = reader.get_data(img_obj)[0] self.assertTupleEqual(img.shape, expected_shape) - @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) + @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) def test_read_region(self, file_path, patch_info, expected_img): kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} reader = WSIReader(self.backend, **kwargs) @@ -155,17 +143,22 @@ def test_read_region(self, file_path, patch_info, expected_img): self.assertTupleEqual(img.shape, expected_img.shape) self.assertIsNone(assert_array_equal(img, expected_img)) - @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) - def test_read_patches(self, file_path, patch_info, expected_img): - reader = WSIReader(self.backend) - with reader.read(file_path) as img_obj: - if self.backend == "tifffile": - with self.assertRaises(ValueError): - reader.get_data(img_obj, **patch_info)[0] - else: - img = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) + @parameterized.expand([TEST_CASE_3]) + def test_read_region_multi_wsi(self, file_path, patch_info, expected_img): + kwargs = {"name": None, "offset": None} if self.backend == "tifffile" else {} + reader = WSIReader(self.backend, **kwargs) + img_obj = reader.read(file_path, **kwargs) + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") @@ -221,19 +214,5 @@ def setUpClass(cls): cls.backend = "cucim" -@skipUnless(has_osl, "Requires OpenSlide") -class TestOpenSlide(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "openslide" - - -@skipUnless(has_tiff, "Requires TiffFile") -class TestTiffFile(WSIReaderTests.Tests): - @classmethod - def setUpClass(cls): - cls.backend = "tifffile" - - if __name__ == "__main__": unittest.main() From 124477ece212e1a2ce93a59ef0f2fb494dfc8f6e Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 19 Apr 2022 22:28:02 +0000 Subject: [PATCH 26/26] Rename test files Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/min_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/min_tests.py b/tests/min_tests.py index 074a4b6861..25acbccb41 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -157,7 +157,7 @@ def run_testsuit(): "test_vitautoenc", "test_write_metrics_reports", "test_wsireader", - "test_wsireader_deprecated", + "test_wsireader_new", "test_zoom", "test_zoom_affine", "test_zoomd",