From b5dccbe020fb37d3f3dd146767f0ddfc20f68061 Mon Sep 17 00:00:00 2001 From: dongy Date: Thu, 4 May 2023 23:35:40 -0700 Subject: [PATCH 01/14] init Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 46 +++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index e0b57ffe1c..664ac949de 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -141,9 +141,12 @@ class nnUNetV2Runner: # noqa: N801 """ - def __init__(self, input_config: Any, work_dir: str = "work_dir") -> None: + def __init__( + self, input_config: Any, trainer_class_name: str = "nnUNetTrainer", work_dir: str = "work_dir" + ) -> None: self.input_info: dict = {} self.input_config_or_dict = input_config + self.trainer_class_name = trainer_class_name self.work_dir = work_dir if isinstance(self.input_config_or_dict, dict): @@ -470,7 +473,7 @@ def plan_and_process( if not no_pp: self.preprocess(c, n_proc, overwrite_plans_name, verbose) - def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs: Any) -> None: + def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple = 0, **kwargs: Any) -> None: """ Run the training on a single GPU with one specified configuration provided. Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`. @@ -478,7 +481,8 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs: Args: config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres". fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4. - gpu_id: an integer to select the device to use. Default: 0. + gpu_id: an integer to select the device to use, or a str/tuple of device indices used for multi-GPU + training (e.g., "0,1"). Default: 0. from nnunetv2.run.run_training import run_training kwargs: this optional parameter allows you to specify additional arguments in ``nnunetv2.run.run_training.run_training``. Currently supported args are @@ -498,11 +502,32 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs: - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you don't want to flood your hard drive with checkpoints. Default: False. """ - os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}" + if isinstance(gpu_id, str): + gpu_id = tuple(map(int, gpu_id.replace('"', "").split(","))) + + if isinstance(gpu_id, tuple): + if len(gpu_id) > 1: + gpu_ids_str = "" + for _i in range(len(gpu_id)): + gpu_ids_str += f"{gpu_id[_i]}," + os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids_str[:-1] + else: + os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}" + else: + os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}" from nnunetv2.run.run_training import run_training - run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs) + if isinstance(gpu_id, int): + run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs) + else: + run_training( + dataset_name_or_id=self.dataset_name_or_id, + configuration=config, + fold=fold, + num_gpus=len(gpu_id), + **kwargs, + ) def train( self, @@ -530,11 +555,18 @@ def train( device_ids = tuple(range(num_gpus)) logger.info(f"number of GPUs is {len(device_ids)}, device ids are {device_ids}") if len(device_ids) > 1: - self.train_parallel(configs=ensure_tuple(configs), device_ids=device_ids, **kwargs) + self.train_parallel( + configs=ensure_tuple(configs), + device_ids=device_ids, + trainer_class_name=self.trainer_class_name, + **kwargs, + ) else: for cfg in ensure_tuple(configs): for _fold in range(self.num_folds): - self.train_single_model(config=cfg, fold=_fold, **kwargs) + self.train_single_model( + config=cfg, fold=_fold, trainer_class_name=self.trainer_class_name, **kwargs + ) def train_parallel_cmd( self, From b60148cb358708906db05d424bbf18bbd6221c3d Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 08:10:24 -0700 Subject: [PATCH 02/14] init Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 664ac949de..4d22ffd38c 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -473,7 +473,7 @@ def plan_and_process( if not no_pp: self.preprocess(c, n_proc, overwrite_plans_name, verbose) - def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple = 0, **kwargs: Any) -> None: + def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **kwargs: Any) -> None: """ Run the training on a single GPU with one specified configuration provided. Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`. @@ -481,8 +481,8 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple = Args: config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres". fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4. - gpu_id: an integer to select the device to use, or a str/tuple of device indices used for multi-GPU - training (e.g., "0,1"). Default: 0. + gpu_id: an integer to select the device to use, or a tuple of GPU device indices used for multi-GPU + training (e.g., (0,1)). Default: 0. from nnunetv2.run.run_training import run_training kwargs: this optional parameter allows you to specify additional arguments in ``nnunetv2.run.run_training.run_training``. Currently supported args are @@ -660,6 +660,7 @@ def train_parallel( if not stage[device_id]: continue cmd_str = "; ".join(stage[device_id]) + logger.info(f"\ncurrent command:\n{cmd_str}") processes.append(subprocess.Popen(cmd_str, shell=True, stdout=subprocess.DEVNULL)) # finish this stage first for p in processes: From f57be402f9bb4e91dba162c4048e8e653c558186 Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 11:31:11 -0700 Subject: [PATCH 03/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 4d22ffd38c..ef060e64b4 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -64,6 +64,9 @@ class nnUNetV2Runner: # noqa: N801 - ``"nnUNet_trained_models"`` - ``"dataset_name_or_id"``: Name or Integer ID of the dataset If an optional key is not specified, then the pipeline will use the default values. + trainer_class_name: the trainer class names offered by nnUNetV2 exhibit variations in training duration. + Default: "nnUNetTrainer". Other options: "nnUNetTrainer_Xepoch". X could be one of 1,5,10,20,50,100, + 250,2000,4000,8000. work_dir: working directory to save the intermediate and final results. Examples: @@ -502,9 +505,6 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you don't want to flood your hard drive with checkpoints. Default: False. """ - if isinstance(gpu_id, str): - gpu_id = tuple(map(int, gpu_id.replace('"', "").split(","))) - if isinstance(gpu_id, tuple): if len(gpu_id) > 1: gpu_ids_str = "" @@ -519,13 +519,20 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** from nnunetv2.run.run_training import run_training if isinstance(gpu_id, int): - run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs) + run_training( + dataset_name_or_id=self.dataset_name_or_id, + configuration=config, + fold=fold, + trainer_class_name=self.trainer_class_name, + **kwargs, + ) else: run_training( dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, num_gpus=len(gpu_id), + trainer_class_name=self.trainer_class_name, **kwargs, ) @@ -547,7 +554,6 @@ def train( kwargs: this optional parameter allows you to specify additional arguments defined in the ``train_single_model`` method. """ - if device_ids is None: result = subprocess.run(["nvidia-smi", "--list-gpus"], stdout=subprocess.PIPE) output = result.stdout.decode("utf-8") @@ -555,18 +561,11 @@ def train( device_ids = tuple(range(num_gpus)) logger.info(f"number of GPUs is {len(device_ids)}, device ids are {device_ids}") if len(device_ids) > 1: - self.train_parallel( - configs=ensure_tuple(configs), - device_ids=device_ids, - trainer_class_name=self.trainer_class_name, - **kwargs, - ) + self.train_parallel(configs=ensure_tuple(configs), device_ids=device_ids, **kwargs) else: for cfg in ensure_tuple(configs): for _fold in range(self.num_folds): - self.train_single_model( - config=cfg, fold=_fold, trainer_class_name=self.trainer_class_name, **kwargs - ) + self.train_single_model(config=cfg, fold=_fold, **kwargs) def train_parallel_cmd( self, @@ -618,7 +617,8 @@ def train_parallel_cmd( cmd = ( "python -m monai.apps.nnunet nnUNetV2Runner train_single_model " + f"--input_config '{self.input_config_or_dict}' --work_dir '{self.work_dir}' " - + f"--config '{_config}' --fold {_i} --gpu_id {the_device}" + + f"--config '{_config}' --fold {_i} --gpu_id {the_device} " + + f"--trainer_class_name {self.trainer_class_name}" ) for _key, _value in kwargs.items(): cmd += f" --{_key} {_value}" @@ -660,7 +660,7 @@ def train_parallel( if not stage[device_id]: continue cmd_str = "; ".join(stage[device_id]) - logger.info(f"\ncurrent command:\n{cmd_str}") + logger.info(f"Current running command on GPU device {device_id}:\n{cmd_str}\n") processes.append(subprocess.Popen(cmd_str, shell=True, stdout=subprocess.DEVNULL)) # finish this stage first for p in processes: From f62405f1d0869731b1ff121bbeae67675f322e93 Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 11:41:03 -0700 Subject: [PATCH 04/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index ef060e64b4..f70430a42d 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -650,7 +650,7 @@ def train_parallel( if not gpu_cmd: continue logger.info( - f"\ntraining - stage {s + 1}:\n" + f"training - stage {s + 1}:\n" f"for gpu {gpu_id}, commands: {gpu_cmd}\n" f"log '.txt' inside '{os.path.join(self.nnunet_results, self.dataset_name)}'" ) From e0df797ec5dd4c3bbba3fef1dc554ed78d2d0c3f Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 23:31:46 -0700 Subject: [PATCH 05/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index f70430a42d..d20d306b9a 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -518,7 +518,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** from nnunetv2.run.run_training import run_training - if isinstance(gpu_id, int): + if isinstance(gpu_id, int) or len(gpu_id) == 1: run_training( dataset_name_or_id=self.dataset_name_or_id, configuration=config, @@ -565,7 +565,7 @@ def train( else: for cfg in ensure_tuple(configs): for _fold in range(self.num_folds): - self.train_single_model(config=cfg, fold=_fold, **kwargs) + self.train_single_model(config=cfg, fold=_fold, gpu_id=device_ids, **kwargs) def train_parallel_cmd( self, From 218a8f61f76079043de6e7381144461914df9fd9 Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 23:36:49 -0700 Subject: [PATCH 06/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index d20d306b9a..a6293114d0 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -570,7 +570,7 @@ def train( def train_parallel_cmd( self, configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES), - device_ids: tuple | None = None, + device_ids: tuple | list | None = None, **kwargs: Any, ) -> list: """ @@ -579,7 +579,7 @@ def train_parallel_cmd( Args: configs: configurations that should be trained. Default: ("2d", "3d_fullres", "3d_lowres", "3d_cascade_fullres"). - device_ids: a tuple of GPU device IDs to use for the training. Default: None (all available GPUs). + device_ids: a tuple/list of GPU device IDs to use for the training. Default: None (all available GPUs). kwargs: this optional parameter allows you to specify additional arguments defined in the ``train_single_model`` method. """ From badcb56db81ce94695b6b31a52324afd29871650 Mon Sep 17 00:00:00 2001 From: dongy Date: Fri, 5 May 2023 23:43:57 -0700 Subject: [PATCH 07/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index a6293114d0..bdea96c194 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -712,9 +712,9 @@ def find_best_configuration( plans: list of plan identifiers. Default: nnUNetPlans. configs: list of configurations. Default: ["2d", "3d_fullres", "3d_lowres", "3d_cascade_fullres"]. trainers: list of trainers. Default: nnUNetTrainer. - allow_ensembling: Set this flag to enable ensembling. + allow_ensembling: set this flag to enable ensembling. num_processes: number of processes to use for ensembling, postprocessing, etc. - overwrite: If set we will overwrite already ensembled files etc. May speed up consecutive + overwrite: if set we will overwrite already ensembled files etc. May speed up consecutive runs of this command (not recommended) at the risk of not updating outdated results. folds: folds to use. Default: (0, 1, 2, 3, 4). strict: a switch that triggers RunTimeError if the logging folder cannot be found. Default: False. From facb12f5ada769bc83053bef62f0d5a40b7d19ec Mon Sep 17 00:00:00 2001 From: dongy Date: Sat, 6 May 2023 00:20:46 -0700 Subject: [PATCH 08/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index bdea96c194..92e118e007 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -67,6 +67,9 @@ class nnUNetV2Runner: # noqa: N801 trainer_class_name: the trainer class names offered by nnUNetV2 exhibit variations in training duration. Default: "nnUNetTrainer". Other options: "nnUNetTrainer_Xepoch". X could be one of 1,5,10,20,50,100, 250,2000,4000,8000. + export_validation_probabilities: True to save softmax predictions from final validation as npz + files (in addition to predicted segmentations). Needed for finding the best ensemble. + Default: True. work_dir: working directory to save the intermediate and final results. Examples: @@ -145,11 +148,16 @@ class nnUNetV2Runner: # noqa: N801 """ def __init__( - self, input_config: Any, trainer_class_name: str = "nnUNetTrainer", work_dir: str = "work_dir" + self, + input_config: Any, + trainer_class_name: str = "nnUNetTrainer", + work_dir: str = "work_dir", + export_validation_probabilities: bool = True, ) -> None: self.input_info: dict = {} self.input_config_or_dict = input_config self.trainer_class_name = trainer_class_name + self.export_validation_probabilities = export_validation_probabilities self.work_dir = work_dir if isinstance(self.input_config_or_dict, dict): @@ -496,9 +504,6 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** - use_compressed_data: True to use compressed data for training. Reading compressed data is much more CPU and (potentially) RAM intensive and should only be used if you know what you are doing. Default: False. - - export_validation_probabilities: True to save softmax predictions from final validation as npz - files (in addition to predicted segmentations). Needed for finding the best ensemble. - Default: False. - continue_training: continue training from latest checkpoint. Default: False. - only_run_validation: True to run the validation only. Requires training to have finished. Default: False. @@ -524,6 +529,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** configuration=config, fold=fold, trainer_class_name=self.trainer_class_name, + export_validation_probabilities=self.export_validation_probabilities, **kwargs, ) else: @@ -533,6 +539,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** fold=fold, num_gpus=len(gpu_id), trainer_class_name=self.trainer_class_name, + export_validation_probabilities=self.export_validation_probabilities, **kwargs, ) @@ -618,7 +625,8 @@ def train_parallel_cmd( "python -m monai.apps.nnunet nnUNetV2Runner train_single_model " + f"--input_config '{self.input_config_or_dict}' --work_dir '{self.work_dir}' " + f"--config '{_config}' --fold {_i} --gpu_id {the_device} " - + f"--trainer_class_name {self.trainer_class_name}" + + f"--trainer_class_name {self.trainer_class_name} " + + f"--export_validation_probabilities {self.export_validation_probabilities}" ) for _key, _value in kwargs.items(): cmd += f" --{_key} {_value}" @@ -698,7 +706,7 @@ def find_best_configuration( self, plans: tuple | str = "nnUNetPlans", configs: tuple | str = (M.N_2D, M.N_3D_FULLRES, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES), - trainers: tuple | str = "nnUNetTrainer", + trainers: tuple | str | None = None, allow_ensembling: bool = True, num_processes: int = -1, overwrite: bool = True, @@ -726,6 +734,9 @@ def find_best_configuration( configs = ensure_tuple(configs) plans = ensure_tuple(plans) + + if trainers == None: + trainers = self.trainer_class_name trainers = ensure_tuple(trainers) models = dumb_trainer_config_plans_to_trained_models_dict(trainers, configs, plans) From 673bcc17ce646c4f569a33b8db387aaa04766329 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 6 May 2023 07:21:14 +0000 Subject: [PATCH 09/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/apps/nnunet/nnunetv2_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 92e118e007..9b943731f1 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -735,7 +735,7 @@ def find_best_configuration( configs = ensure_tuple(configs) plans = ensure_tuple(plans) - if trainers == None: + if trainers is None: trainers = self.trainer_class_name trainers = ensure_tuple(trainers) From bab5f904834827502844e54efbe7469cdd0504d2 Mon Sep 17 00:00:00 2001 From: dongy Date: Sat, 6 May 2023 08:00:42 -0700 Subject: [PATCH 10/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 92e118e007..9ebf6755d5 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -510,6 +510,10 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you don't want to flood your hard drive with checkpoints. Default: False. """ + if "num_gpus" in kwargs: + kwargs.pop("num_gpus") + logger.warning("please use device_id to set the GPUs to use") + if isinstance(gpu_id, tuple): if len(gpu_id) > 1: gpu_ids_str = "" From 0528a769ae2b65e9851498a420b639454906930b Mon Sep 17 00:00:00 2001 From: dongy Date: Sat, 6 May 2023 08:07:42 -0700 Subject: [PATCH 11/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 17bf075ced..0d91ab8d5d 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -484,7 +484,7 @@ def plan_and_process( if not no_pp: self.preprocess(c, n_proc, overwrite_plans_name, verbose) - def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **kwargs: Any) -> None: + def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int = 0, **kwargs: Any) -> None: """ Run the training on a single GPU with one specified configuration provided. Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`. @@ -514,7 +514,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** kwargs.pop("num_gpus") logger.warning("please use device_id to set the GPUs to use") - if isinstance(gpu_id, tuple): + if isinstance(gpu_id, tuple) or isinstance(gpu_id, list): if len(gpu_id) > 1: gpu_ids_str = "" for _i in range(len(gpu_id)): @@ -550,7 +550,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, ** def train( self, configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES), - device_ids: tuple | None = None, + device_ids: tuple | list | None = None, **kwargs: Any, ) -> None: """ From 2edcd5cd2aeb80e9f69949e40e233d2eb570cca0 Mon Sep 17 00:00:00 2001 From: dongy Date: Sat, 6 May 2023 08:27:12 -0700 Subject: [PATCH 12/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 0d91ab8d5d..41584e7093 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -492,7 +492,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int Args: config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres". fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4. - gpu_id: an integer to select the device to use, or a tuple of GPU device indices used for multi-GPU + gpu_id: an integer to select the device to use, or a tuple/list of GPU device indices used for multi-GPU training (e.g., (0,1)). Default: 0. from nnunetv2.run.run_training import run_training kwargs: this optional parameter allows you to specify additional arguments in From 06a8e2e2649a85c54f31665e997befa0676c2419 Mon Sep 17 00:00:00 2001 From: dongy Date: Sat, 6 May 2023 08:28:45 -0700 Subject: [PATCH 13/14] update Signed-off-by: dongy --- monai/apps/nnunet/nnunetv2_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index 41584e7093..e7837f1dc8 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -512,7 +512,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int """ if "num_gpus" in kwargs: kwargs.pop("num_gpus") - logger.warning("please use device_id to set the GPUs to use") + logger.warning("please use gpu_id to set the GPUs to use") if isinstance(gpu_id, tuple) or isinstance(gpu_id, list): if len(gpu_id) > 1: From 6d7b3cb42133bcb1d2535c743103a053cf2248b1 Mon Sep 17 00:00:00 2001 From: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Date: Sun, 7 May 2023 02:42:31 +0000 Subject: [PATCH 14/14] fix mypy Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> --- monai/apps/nnunet/nnunetv2_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py index e7837f1dc8..09260c04d4 100644 --- a/monai/apps/nnunet/nnunetv2_runner.py +++ b/monai/apps/nnunet/nnunetv2_runner.py @@ -641,7 +641,7 @@ def train_parallel_cmd( def train_parallel( self, configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES), - device_ids: tuple | None = None, + device_ids: tuple | list | None = None, **kwargs: Any, ) -> None: """