From c396bffca7017566e33f2be5996957ab84e4a0f4 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 2 Dec 2021 17:34:42 +0000 Subject: [PATCH 1/2] Update TiffFile backend to read only the entire image Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/image_reader.py | 56 +++++++++++++++++++++----------------- tests/test_wsireader.py | 31 ++++++++++++--------- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index a620e06216..12617548ed 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -768,10 +768,8 @@ def get_data( # Verify inputs if level is None: level = self._check_level(img, level) - if size is None: - size = self._get_image_size(img, size, level, location) - # Extract patch (or the whole image) + # Extract a region or the entire image region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype) # Add necessary metadata @@ -810,49 +808,57 @@ def _check_level(self, img, level): return level def _get_image_size(self, img, size, level, location): + """ + Calculate the maximum region size for the given level and starting location (if size is None). + Note that region size in OpenSlide and cuCIM are WxH (but the final image output would be HxW) + """ + if size is not None: + return size[::-1] + max_size = [] downsampling_factor = [] if self.backend == "openslide": downsampling_factor = img.level_downsamples[level] - max_size = img.level_dimensions[level][::-1] + max_size = img.level_dimensions[level] elif self.backend == "cucim": downsampling_factor = img.resolutions["level_downsamples"][level] - max_size = img.resolutions["level_dimensions"][level][::-1] - elif self.backend == "tifffile": - level0_size = img.pages[0].shape[:2] - max_size = img.pages[level].shape[:2] - downsampling_factor = np.mean([level0_size[i] / max_size[i] for i in range(len(max_size))]) + max_size = img.resolutions["level_dimensions"][level] - # subtract the top left corner of the patch from maximum size - level_location = [round(location[i] / downsampling_factor) for i in range(len(location))] - size = [max_size[i] - level_location[i] for i in range(len(max_size))] + # subtract the top left corner of the patch (at given level) from maximum size + location_at_level = (round(location[1] / downsampling_factor), round(location[0] / downsampling_factor)) + size = [max_size[i] - location_at_level[i] for i in range(len(max_size))] return size def _extract_region( self, img_obj, - size: Tuple[int, int], + size: Optional[Tuple[int, int]], location: Tuple[int, int] = (0, 0), level: int = 0, dtype: DtypeLike = np.uint8, ): if self.backend == "tifffile": - # with img_obj: - region = img_obj.asarray(level=level) - if level != 0: - level0_size = img_obj.pages[0].shape[:2] - max_size = img_obj.pages[level].shape[:2] - location = ( - int(location[0] / level0_size[0] * max_size[0]), - int(location[1] / level0_size[1] * max_size[1]), + # Read the entire image + if size is not None: + raise ValueError( + f"TiffFile backend reads the entire image only, so size '{size}'' should not be provided!", + "For more flexibility or extracting regions, please use cuCIM or OpenSlide backend.", + ) + if location != (0, 0): + raise ValueError( + f"TiffFile backend reads the entire image only, so location '{location}' should not be provided!", + "For more flexibility and extracting regions, please use cuCIM or OpenSlide backend.", ) - region = region[location[0] : location[0] + size[0], location[1] : location[1] + size[1]] + region = img_obj.asarray(level=level) else: + # Extract a region (or the entire image) + if size is None: + region_size = self._get_image_size(img_obj, size, level, location) + else: + region_size = size[::-1] # reverse the order of dimensions for size and location to become WxH - location = location[::-1] - size = size[::-1] - region = img_obj.read_region(location=location, size=size, level=level) + region = img_obj.read_region(location=location[::-1], size=region_size, level=level) region = self.convert_to_rgb_array(region, dtype) return region diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index e47a22908a..6ec1ace20d 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -119,26 +119,31 @@ def test_read_whole_image(self, file_path, level, expected_shape): @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_5]) def test_read_region(self, file_path, patch_info, expected_img): - # Due to CPU memory limitation ignore tifffile at level 0. - if self.backend == "tifffile" and patch_info["level"] == 0: - return reader = WSIReader(self.backend) - # Read twice to check multiple calls with reader.read(file_path) as img_obj: - img = reader.get_data(img_obj, **patch_info)[0] - img2 = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, img2.shape) - self.assertIsNone(assert_array_equal(img, img2)) - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + # Read twice to check multiple calls + img = reader.get_data(img_obj, **patch_info)[0] + img2 = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, img2.shape) + self.assertIsNone(assert_array_equal(img, img2)) + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_3, TEST_CASE_4]) def test_read_patches(self, file_path, patch_info, expected_img): reader = WSIReader(self.backend) with reader.read(file_path) as img_obj: - img = reader.get_data(img_obj, **patch_info)[0] - self.assertTupleEqual(img.shape, expected_img.shape) - self.assertIsNone(assert_array_equal(img, expected_img)) + if self.backend == "tifffile": + with self.assertRaises(ValueError): + reader.get_data(img_obj, **patch_info)[0] + else: + img = reader.get_data(img_obj, **patch_info)[0] + self.assertTupleEqual(img.shape, expected_img.shape) + self.assertIsNone(assert_array_equal(img, expected_img)) @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1]) @skipUnless(has_tiff, "Requires tifffile.") From a0143f81a9c45cd8de1c711085c430250e717bf6 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 2 Dec 2021 21:09:32 +0000 Subject: [PATCH 2/2] Remove redundant check for size Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/image_reader.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 12617548ed..15717d090b 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -852,13 +852,12 @@ def _extract_region( ) region = img_obj.asarray(level=level) else: + # Get region size to be extracted + region_size = self._get_image_size(img_obj, size, level, location) + # reverse the order of location's dimensions to become WxH (for cuCIM and OpenSlide) + region_location = location[::-1] # Extract a region (or the entire image) - if size is None: - region_size = self._get_image_size(img_obj, size, level, location) - else: - region_size = size[::-1] - # reverse the order of dimensions for size and location to become WxH - region = img_obj.read_region(location=location[::-1], size=region_size, level=level) + region = img_obj.read_region(location=region_location, size=region_size, level=level) region = self.convert_to_rgb_array(region, dtype) return region