From c80e0cdb3fcdb6533f7f1ede55ba5c186329f4b6 Mon Sep 17 00:00:00 2001 From: Yiheng Wang Date: Thu, 17 Mar 2022 18:18:04 +0800 Subject: [PATCH] update cachedataset num workers Signed-off-by: Yiheng Wang --- monai/apps/datasets.py | 10 ++++++---- monai/apps/pathology/data/datasets.py | 6 ++++-- monai/data/dataset.py | 11 +++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py index 922289a2c4..1bfb97abd9 100644 --- a/monai/apps/datasets.py +++ b/monai/apps/datasets.py @@ -50,7 +50,8 @@ class MedNISTDataset(Randomizable, CacheDataset): cache_rate: percentage of cached data in total, default is 1.0 (cache all). will take the minimum of (cache_num, data_length x cache_rate, data_length). num_workers: the number of worker threads to use. - if 0 a single thread will be used. Default is 0. + If num_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. progress: whether to display a progress bar when downloading dataset and computing the transform cache content. copy_cache: whether to `deepcopy` the cache content before applying the random transforms, default to `True`. if the random transforms don't modify the cached content @@ -82,7 +83,7 @@ def __init__( test_frac: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, - num_workers: int = 0, + num_workers: Optional[int] = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, @@ -202,7 +203,8 @@ class DecathlonDataset(Randomizable, CacheDataset): cache_rate: percentage of cached data in total, default is 1.0 (cache all). will take the minimum of (cache_num, data_length x cache_rate, data_length). num_workers: the number of worker threads to use. - if 0 a single thread will be used. Default is 0. + If num_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. progress: whether to display a progress bar when downloading dataset and computing the transform cache content. copy_cache: whether to `deepcopy` the cache content before applying the random transforms, default to `True`. if the random transforms don't modify the cached content @@ -274,7 +276,7 @@ def __init__( val_frac: float = 0.2, cache_num: int = sys.maxsize, cache_rate: float = 1.0, - num_workers: int = 0, + num_workers: int = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py index 7a44e3f6fc..77e3bb34c4 100644 --- a/monai/apps/pathology/data/datasets.py +++ b/monai/apps/pathology/data/datasets.py @@ -119,8 +119,10 @@ class SmartCachePatchWSIDataset(SmartCacheDataset): will take the minimum of (cache_num, data_length x cache_rate, data_length). num_init_workers: the number of worker threads to initialize the cache for first epoch. If num_init_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. num_replace_workers: the number of worker threads to prepare the replacement cache for every epoch. If num_replace_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. progress: whether to display a progress bar when caching for the first epoch. copy_cache: whether to `deepcopy` the cache content before applying the random transforms, default to `True`. if the random transforms don't modify the cache content @@ -142,8 +144,8 @@ def __init__( replace_rate: float = 0.5, cache_num: int = sys.maxsize, cache_rate: float = 1.0, - num_init_workers: Optional[int] = None, - num_replace_workers: Optional[int] = None, + num_init_workers: Optional[int] = 1, + num_replace_workers: Optional[int] = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, diff --git a/monai/data/dataset.py b/monai/data/dataset.py index adcfad360f..3c1fc0abed 100644 --- a/monai/data/dataset.py +++ b/monai/data/dataset.py @@ -672,7 +672,7 @@ def __init__( transform: Union[Sequence[Callable], Callable], cache_num: int = sys.maxsize, cache_rate: float = 1.0, - num_workers: Optional[int] = None, + num_workers: Optional[int] = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, @@ -687,8 +687,9 @@ def __init__( will take the minimum of (cache_num, data_length x cache_rate, data_length). cache_rate: percentage of cached data in total, default is 1.0 (cache all). will take the minimum of (cache_num, data_length x cache_rate, data_length). - num_workers: the number of worker processes to use. + num_workers: the number of worker threads to use. If num_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. progress: whether to display a progress bar. copy_cache: whether to `deepcopy` the cache content before applying the random transforms, default to `True`. if the random transforms don't modify the cached content @@ -862,8 +863,10 @@ class SmartCacheDataset(Randomizable, CacheDataset): will take the minimum of (cache_num, data_length x cache_rate, data_length). num_init_workers: the number of worker threads to initialize the cache for first epoch. If num_init_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. num_replace_workers: the number of worker threads to prepare the replacement cache for every epoch. If num_replace_workers is None then the number returned by os.cpu_count() is used. + If a value less than 1 is speficied, 1 will be used instead. progress: whether to display a progress bar when caching for the first epoch. shuffle: whether to shuffle the whole data list before preparing the cache content for first epoch. it will not modify the original input data sequence in-place. @@ -884,8 +887,8 @@ def __init__( replace_rate: float, cache_num: int = sys.maxsize, cache_rate: float = 1.0, - num_init_workers: Optional[int] = None, - num_replace_workers: Optional[int] = None, + num_init_workers: Optional[int] = 1, + num_replace_workers: Optional[int] = 1, progress: bool = True, shuffle: bool = True, seed: int = 0,