From f6716e2ff295f5b981e3a833b817f203681a21e1 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 27 Apr 2022 18:12:07 +0000 Subject: [PATCH 1/7] Make all transforms optional Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index 3c1fc0abed..cff93761ad 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -200,8 +200,8 @@ class PersistentDataset(Dataset): def __init__( self, data: Sequence, - transform: Union[Sequence[Callable], Callable], cache_dir: Optional[Union[Path, str]], + transform: Optional[Union[Sequence[Callable], Callable]] = None, hash_func: Callable[..., bytes] = pickle_hashing, pickle_module: str = "pickle", pickle_protocol: int = DEFAULT_PROTOCOL, @@ -374,9 +374,9 @@ class CacheNTransDataset(PersistentDataset): def __init__( self, data: Sequence, - transform: Union[Sequence[Callable], Callable], cache_n_trans: int, cache_dir: Optional[Union[Path, str]], + transform: Optional[Union[Sequence[Callable], Callable]] = None, hash_func: Callable[..., bytes] = pickle_hashing, pickle_module: str = "pickle", pickle_protocol: int = DEFAULT_PROTOCOL, @@ -476,7 +476,7 @@ class LMDBDataset(PersistentDataset): def __init__( self, data: Sequence, - transform: Union[Sequence[Callable], Callable], + transform: Optional[Union[Sequence[Callable], Callable]] = None, cache_dir: Union[Path, str] = "cache", hash_func: Callable[..., bytes] = pickle_hashing, db_name: str = "monai_cache", @@ -669,7 +669,7 @@ class CacheDataset(Dataset): def __init__( self, data: Sequence, - transform: Union[Sequence[Callable], Callable], + transform: Optional[Union[Sequence[Callable], Callable]] = None, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: Optional[int] = 1, @@ -883,8 +883,8 @@ class SmartCacheDataset(Randomizable, CacheDataset): def __init__( self, data: Sequence, - transform: Union[Sequence[Callable], Callable], replace_rate: float, + transform: Optional[Union[Sequence[Callable], Callable]] = None, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_init_workers: Optional[int] = 1, From f21fe5a323206b0010c4bf6cf377a35d9a36e59e Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 28 Apr 2022 14:48:50 +0000 Subject: [PATCH 2/7] Update wsireader tests Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_wsireader.py | 28 +++++++++------------------- tests/test_wsireader_new.py | 16 ++++++++++------ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index 3655100dab..5d092c4ce5 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -84,7 +84,9 @@ TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW -TEST_CASE_ERROR_GRAY = [np.ones((16, 16, 2), dtype=np.uint8)] # wrong color channel +TEST_CASE_ERROR_0C = [np.ones((16, 16), dtype=np.uint8)] # no color channel +TEST_CASE_ERROR_1C = [np.ones((16, 16, 1), dtype=np.uint8)] # one color channel +TEST_CASE_ERROR_2C = [np.ones((16, 16, 2), dtype=np.uint8)] # two color channels TEST_CASE_ERROR_3D = [np.ones((16, 16, 16, 3), dtype=np.uint8)] # 3D + color @@ -106,20 +108,6 @@ def save_rgba_tiff(array: np.ndarray, filename: str, mode: str): return filename -def save_gray_tiff(array: np.ndarray, filename: str): - """ - Save numpy array into a TIFF file - - Args: - array: numpy ndarray with any shape - filename: the filename to be used for the tiff file. - """ - img_gray = array - imwrite(filename, img_gray, shape=img_gray.shape, photometric="minisblack") - - return filename - - @skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") def setUpModule(): # noqa: N802 hash_type = testing_data_config("images", FILE_KEY, "hash_type") @@ -187,13 +175,15 @@ def test_read_rgba(self, img_expected): self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) self.assertIsNone(assert_array_equal(image["RGBA"], img_expected)) - @parameterized.expand([TEST_CASE_ERROR_GRAY, TEST_CASE_ERROR_3D]) + @parameterized.expand([TEST_CASE_ERROR_0C, TEST_CASE_ERROR_1C, TEST_CASE_ERROR_2C, TEST_CASE_ERROR_3D]) @skipUnless(has_tiff, "Requires tifffile.") def test_read_malformats(self, img_expected): + if self.backend == "cucim" and (len(img_expected.shape) < 3 or img_expected.shape[2] == 1): + # Until cuCIM addresses https://github.com/rapidsai/cucim/issues/230 + return reader = WSIReader(self.backend) - file_path = save_gray_tiff( - img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") - ) + file_path = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") + imwrite(file_path, img_expected, shape=img_expected.shape) with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): with reader.read(file_path) as img_obj: reader.get_data(img_obj) diff --git a/tests/test_wsireader_new.py b/tests/test_wsireader_new.py index 63d61dfeb3..2ac4125f97 100644 --- a/tests/test_wsireader_new.py +++ b/tests/test_wsireader_new.py @@ -72,7 +72,9 @@ TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)] # CHW -TEST_CASE_ERROR_GRAY = [np.ones((16, 16, 2), dtype=np.uint8)] # wrong color channel +TEST_CASE_ERROR_0C = [np.ones((16, 16), dtype=np.uint8)] # no color channel +TEST_CASE_ERROR_1C = [np.ones((16, 16, 1), dtype=np.uint8)] # one color channel +TEST_CASE_ERROR_2C = [np.ones((16, 16, 2), dtype=np.uint8)] # two color channels TEST_CASE_ERROR_3D = [np.ones((16, 16, 16, 3), dtype=np.uint8)] # 3D + color @@ -103,7 +105,7 @@ def save_gray_tiff(array: np.ndarray, filename: str): filename: the filename to be used for the tiff file. """ img_gray = array - imwrite(filename, img_gray, shape=img_gray.shape, photometric="minisblack") + imwrite(filename, img_gray, shape=img_gray.shape) return filename @@ -180,13 +182,15 @@ def test_read_rgba(self, img_expected): self.assertIsNone(assert_array_equal(image["RGB"], img_expected)) self.assertIsNone(assert_array_equal(image["RGBA"], img_expected)) - @parameterized.expand([TEST_CASE_ERROR_GRAY, TEST_CASE_ERROR_3D]) + @parameterized.expand([TEST_CASE_ERROR_0C, TEST_CASE_ERROR_1C, TEST_CASE_ERROR_2C, TEST_CASE_ERROR_3D]) @skipUnless(has_tiff, "Requires tifffile.") def test_read_malformats(self, img_expected): + if self.backend == "cucim" and (len(img_expected.shape) < 3 or img_expected.shape[2] == 1): + # Until cuCIM addresses https://github.com/rapidsai/cucim/issues/230 + return reader = WSIReader(self.backend) - file_path = save_gray_tiff( - img_expected, os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") - ) + file_path = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tiff_image_gray.tiff") + imwrite(file_path, img_expected, shape=img_expected.shape) with self.assertRaises((RuntimeError, ValueError, openslide.OpenSlideError if has_osl else ValueError)): with reader.read(file_path) as img_obj: reader.get_data(img_obj) From 1610bbc65bfcf0f3d832c148b3d69ecd4019fa25 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 28 Apr 2022 14:59:43 +0000 Subject: [PATCH 3/7] Remove optional from PersistentDataset and its derivatives Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index cff93761ad..c8b847bc36 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -200,8 +200,8 @@ class PersistentDataset(Dataset): def __init__( self, data: Sequence, + transform: Union[Sequence[Callable], Callable], cache_dir: Optional[Union[Path, str]], - transform: Optional[Union[Sequence[Callable], Callable]] = None, hash_func: Callable[..., bytes] = pickle_hashing, pickle_module: str = "pickle", pickle_protocol: int = DEFAULT_PROTOCOL, @@ -374,9 +374,9 @@ class CacheNTransDataset(PersistentDataset): def __init__( self, data: Sequence, + transform: Union[Sequence[Callable], Callable], cache_n_trans: int, cache_dir: Optional[Union[Path, str]], - transform: Optional[Union[Sequence[Callable], Callable]] = None, hash_func: Callable[..., bytes] = pickle_hashing, pickle_module: str = "pickle", pickle_protocol: int = DEFAULT_PROTOCOL, @@ -476,7 +476,7 @@ class LMDBDataset(PersistentDataset): def __init__( self, data: Sequence, - transform: Optional[Union[Sequence[Callable], Callable]] = None, + transform: Union[Sequence[Callable], Callable], cache_dir: Union[Path, str] = "cache", hash_func: Callable[..., bytes] = pickle_hashing, db_name: str = "monai_cache", From 3d9516d38a417197b5b282ec86dd6887baeabfbe Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 28 Apr 2022 15:26:43 +0000 Subject: [PATCH 4/7] Add unittests for cache without transform Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_cachedataset.py | 6 ++++++ tests/test_smartcachedataset.py | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/tests/test_cachedataset.py b/tests/test_cachedataset.py index 4b77d4a55a..4fa1b5ea69 100644 --- a/tests/test_cachedataset.py +++ b/tests/test_cachedataset.py @@ -55,6 +55,12 @@ def test_shape(self, transform, expected_shape): data4 = dataset[-1] self.assertEqual(len(data3), 1) + if transform is None: + # Check without providing transfrom + dataset2 = CacheDataset(data=test_data, cache_rate=0.5, as_contiguous=True) + for k in ["image", "label", "extra"]: + self.assertEqual(dataset[0][k], dataset2[0][k]) + if transform is None: self.assertEqual(data1["image"], os.path.join(tempdir, "image1.nii.gz")) self.assertEqual(data2["label"], os.path.join(tempdir, "label2.nii.gz")) diff --git a/tests/test_smartcachedataset.py b/tests/test_smartcachedataset.py index e7d51be63a..6eca6113f0 100644 --- a/tests/test_smartcachedataset.py +++ b/tests/test_smartcachedataset.py @@ -56,6 +56,17 @@ def test_shape(self, replace_rate, num_replace_workers, transform): num_init_workers=4, num_replace_workers=num_replace_workers, ) + if transform is None: + # Check without providing transfrom + dataset2 = SmartCacheDataset( + data=test_data, + replace_rate=replace_rate, + cache_num=16, + num_init_workers=4, + num_replace_workers=num_replace_workers, + ) + for k in ["image", "label", "extra"]: + self.assertEqual(dataset[0][k], dataset2[0][k]) self.assertEqual(len(dataset._cache), dataset.cache_num) for i in range(dataset.cache_num): From a9f24c5d04e0a2ffcb9be563754e312f23df30b8 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 28 Apr 2022 17:14:27 +0000 Subject: [PATCH 5/7] Add default replace_rate Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index c8b847bc36..42ac3b8f99 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -669,7 +669,7 @@ class CacheDataset(Dataset): def __init__( self, data: Sequence, - transform: Optional[Union[Sequence[Callable], Callable]] = None, + transform: Optional[Union[Sequence[Callable], Callable]], cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: Optional[int] = 1, @@ -883,8 +883,8 @@ class SmartCacheDataset(Randomizable, CacheDataset): def __init__( self, data: Sequence, - replace_rate: float, transform: Optional[Union[Sequence[Callable], Callable]] = None, + replace_rate: float = 0.5, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_init_workers: Optional[int] = 1, From 0d6450b0fb4bc6920a241b8f743a44209db78aa1 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 28 Apr 2022 19:54:50 +0000 Subject: [PATCH 6/7] Add default value Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index 42ac3b8f99..54e8e9fdb1 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -669,7 +669,7 @@ class CacheDataset(Dataset): def __init__( self, data: Sequence, - transform: Optional[Union[Sequence[Callable], Callable]], + transform: Optional[Union[Sequence[Callable], Callable]] = None, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: Optional[int] = 1, From 20c4882ae69c490cd841e3121a54cd1fb1e8e774 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 29 Apr 2022 12:10:56 +0000 Subject: [PATCH 7/7] Set default replace_rate to 0.1 Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/data/dataset.py b/monai/data/dataset.py index 54e8e9fdb1..29342742cb 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -856,7 +856,7 @@ class SmartCacheDataset(Randomizable, CacheDataset): Args: data: input data to load and transform to generate dataset for model. transform: transforms to execute operations on input data. - replace_rate: percentage of the cached items to be replaced in every epoch. + replace_rate: percentage of the cached items to be replaced in every epoch (default to 0.1). cache_num: number of items to be cached. Default is `sys.maxsize`. will take the minimum of (cache_num, data_length x cache_rate, data_length). cache_rate: percentage of cached data in total, default is 1.0 (cache all). @@ -884,7 +884,7 @@ def __init__( self, data: Sequence, transform: Optional[Union[Sequence[Callable], Callable]] = None, - replace_rate: float = 0.5, + replace_rate: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_init_workers: Optional[int] = 1,