From b5dccbe020fb37d3f3dd146767f0ddfc20f68061 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Thu, 4 May 2023 23:35:40 -0700
Subject: [PATCH 01/14] init

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 46 +++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index e0b57ffe1c..664ac949de 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -141,9 +141,12 @@ class nnUNetV2Runner:  # noqa: N801
 
     """
 
-    def __init__(self, input_config: Any, work_dir: str = "work_dir") -> None:
+    def __init__(
+        self, input_config: Any, trainer_class_name: str = "nnUNetTrainer", work_dir: str = "work_dir"
+    ) -> None:
         self.input_info: dict = {}
         self.input_config_or_dict = input_config
+        self.trainer_class_name = trainer_class_name
         self.work_dir = work_dir
 
         if isinstance(self.input_config_or_dict, dict):
@@ -470,7 +473,7 @@ def plan_and_process(
         if not no_pp:
             self.preprocess(c, n_proc, overwrite_plans_name, verbose)
 
-    def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs: Any) -> None:
+    def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple = 0, **kwargs: Any) -> None:
         """
         Run the training on a single GPU with one specified configuration provided.
         Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`.
@@ -478,7 +481,8 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs:
         Args:
             config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres".
             fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4.
-            gpu_id: an integer to select the device to use. Default: 0.
+            gpu_id: an integer to select the device to use, or a str/tuple of device indices used for multi-GPU
+                training (e.g., "0,1"). Default: 0.
         from nnunetv2.run.run_training import run_training
             kwargs: this optional parameter allows you to specify additional arguments in
                 ``nnunetv2.run.run_training.run_training``. Currently supported args are
@@ -498,11 +502,32 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int = 0, **kwargs:
                     - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you
                         don't want to flood your hard drive with checkpoints. Default: False.
         """
-        os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
+        if isinstance(gpu_id, str):
+            gpu_id = tuple(map(int, gpu_id.replace('"', "").split(",")))
+
+        if isinstance(gpu_id, tuple):
+            if len(gpu_id) > 1:
+                gpu_ids_str = ""
+                for _i in range(len(gpu_id)):
+                    gpu_ids_str += f"{gpu_id[_i]},"
+                os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids_str[:-1]
+            else:
+                os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
+        else:
+            os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
 
         from nnunetv2.run.run_training import run_training
 
-        run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs)
+        if isinstance(gpu_id, int):
+            run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs)
+        else:
+            run_training(
+                dataset_name_or_id=self.dataset_name_or_id,
+                configuration=config,
+                fold=fold,
+                num_gpus=len(gpu_id),
+                **kwargs,
+            )
 
     def train(
         self,
@@ -530,11 +555,18 @@ def train(
             device_ids = tuple(range(num_gpus))
         logger.info(f"number of GPUs is {len(device_ids)}, device ids are {device_ids}")
         if len(device_ids) > 1:
-            self.train_parallel(configs=ensure_tuple(configs), device_ids=device_ids, **kwargs)
+            self.train_parallel(
+                configs=ensure_tuple(configs),
+                device_ids=device_ids,
+                trainer_class_name=self.trainer_class_name,
+                **kwargs,
+            )
         else:
             for cfg in ensure_tuple(configs):
                 for _fold in range(self.num_folds):
-                    self.train_single_model(config=cfg, fold=_fold, **kwargs)
+                    self.train_single_model(
+                        config=cfg, fold=_fold, trainer_class_name=self.trainer_class_name, **kwargs
+                    )
 
     def train_parallel_cmd(
         self,

From b60148cb358708906db05d424bbf18bbd6221c3d Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 08:10:24 -0700
Subject: [PATCH 02/14] init

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 664ac949de..4d22ffd38c 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -473,7 +473,7 @@ def plan_and_process(
         if not no_pp:
             self.preprocess(c, n_proc, overwrite_plans_name, verbose)
 
-    def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple = 0, **kwargs: Any) -> None:
+    def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **kwargs: Any) -> None:
         """
         Run the training on a single GPU with one specified configuration provided.
         Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`.
@@ -481,8 +481,8 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | str | tuple =
         Args:
             config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres".
             fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4.
-            gpu_id: an integer to select the device to use, or a str/tuple of device indices used for multi-GPU
-                training (e.g., "0,1"). Default: 0.
+            gpu_id: an integer to select the device to use, or a tuple of GPU device indices used for multi-GPU
+                training (e.g., (0,1)). Default: 0.
         from nnunetv2.run.run_training import run_training
             kwargs: this optional parameter allows you to specify additional arguments in
                 ``nnunetv2.run.run_training.run_training``. Currently supported args are
@@ -660,6 +660,7 @@ def train_parallel(
                 if not stage[device_id]:
                     continue
                 cmd_str = "; ".join(stage[device_id])
+                logger.info(f"\ncurrent command:\n{cmd_str}")
                 processes.append(subprocess.Popen(cmd_str, shell=True, stdout=subprocess.DEVNULL))
             # finish this stage first
             for p in processes:

From f57be402f9bb4e91dba162c4048e8e653c558186 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 11:31:11 -0700
Subject: [PATCH 03/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 32 ++++++++++++++--------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 4d22ffd38c..ef060e64b4 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -64,6 +64,9 @@ class nnUNetV2Runner:  # noqa: N801
             - ``"nnUNet_trained_models"``
             - ``"dataset_name_or_id"``: Name or Integer ID of the dataset
             If an optional key is not specified, then the pipeline will use the default values.
+        trainer_class_name: the trainer class names offered by nnUNetV2 exhibit variations in training duration.
+            Default: "nnUNetTrainer". Other options: "nnUNetTrainer_Xepoch". X could be one of 1,5,10,20,50,100,
+            250,2000,4000,8000.
         work_dir: working directory to save the intermediate and final results.
 
     Examples:
@@ -502,9 +505,6 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
                     - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you
                         don't want to flood your hard drive with checkpoints. Default: False.
         """
-        if isinstance(gpu_id, str):
-            gpu_id = tuple(map(int, gpu_id.replace('"', "").split(",")))
-
         if isinstance(gpu_id, tuple):
             if len(gpu_id) > 1:
                 gpu_ids_str = ""
@@ -519,13 +519,20 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
         from nnunetv2.run.run_training import run_training
 
         if isinstance(gpu_id, int):
-            run_training(dataset_name_or_id=self.dataset_name_or_id, configuration=config, fold=fold, **kwargs)
+            run_training(
+                dataset_name_or_id=self.dataset_name_or_id,
+                configuration=config,
+                fold=fold,
+                trainer_class_name=self.trainer_class_name,
+                **kwargs,
+            )
         else:
             run_training(
                 dataset_name_or_id=self.dataset_name_or_id,
                 configuration=config,
                 fold=fold,
                 num_gpus=len(gpu_id),
+                trainer_class_name=self.trainer_class_name,
                 **kwargs,
             )
 
@@ -547,7 +554,6 @@ def train(
             kwargs: this optional parameter allows you to specify additional arguments defined in the
                 ``train_single_model`` method.
         """
-
         if device_ids is None:
             result = subprocess.run(["nvidia-smi", "--list-gpus"], stdout=subprocess.PIPE)
             output = result.stdout.decode("utf-8")
@@ -555,18 +561,11 @@ def train(
             device_ids = tuple(range(num_gpus))
         logger.info(f"number of GPUs is {len(device_ids)}, device ids are {device_ids}")
         if len(device_ids) > 1:
-            self.train_parallel(
-                configs=ensure_tuple(configs),
-                device_ids=device_ids,
-                trainer_class_name=self.trainer_class_name,
-                **kwargs,
-            )
+            self.train_parallel(configs=ensure_tuple(configs), device_ids=device_ids, **kwargs)
         else:
             for cfg in ensure_tuple(configs):
                 for _fold in range(self.num_folds):
-                    self.train_single_model(
-                        config=cfg, fold=_fold, trainer_class_name=self.trainer_class_name, **kwargs
-                    )
+                    self.train_single_model(config=cfg, fold=_fold, **kwargs)
 
     def train_parallel_cmd(
         self,
@@ -618,7 +617,8 @@ def train_parallel_cmd(
                         cmd = (
                             "python -m monai.apps.nnunet nnUNetV2Runner train_single_model "
                             + f"--input_config '{self.input_config_or_dict}' --work_dir '{self.work_dir}' "
-                            + f"--config '{_config}' --fold {_i} --gpu_id {the_device}"
+                            + f"--config '{_config}' --fold {_i} --gpu_id {the_device} "
+                            + f"--trainer_class_name {self.trainer_class_name}"
                         )
                         for _key, _value in kwargs.items():
                             cmd += f" --{_key} {_value}"
@@ -660,7 +660,7 @@ def train_parallel(
                 if not stage[device_id]:
                     continue
                 cmd_str = "; ".join(stage[device_id])
-                logger.info(f"\ncurrent command:\n{cmd_str}")
+                logger.info(f"Current running command on GPU device {device_id}:\n{cmd_str}\n")
                 processes.append(subprocess.Popen(cmd_str, shell=True, stdout=subprocess.DEVNULL))
             # finish this stage first
             for p in processes:

From f62405f1d0869731b1ff121bbeae67675f322e93 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 11:41:03 -0700
Subject: [PATCH 04/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index ef060e64b4..f70430a42d 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -650,7 +650,7 @@ def train_parallel(
                 if not gpu_cmd:
                     continue
                 logger.info(
-                    f"\ntraining - stage {s + 1}:\n"
+                    f"training - stage {s + 1}:\n"
                     f"for gpu {gpu_id}, commands: {gpu_cmd}\n"
                     f"log '.txt' inside '{os.path.join(self.nnunet_results, self.dataset_name)}'"
                 )

From e0df797ec5dd4c3bbba3fef1dc554ed78d2d0c3f Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 23:31:46 -0700
Subject: [PATCH 05/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index f70430a42d..d20d306b9a 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -518,7 +518,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
 
         from nnunetv2.run.run_training import run_training
 
-        if isinstance(gpu_id, int):
+        if isinstance(gpu_id, int) or len(gpu_id) == 1:
             run_training(
                 dataset_name_or_id=self.dataset_name_or_id,
                 configuration=config,
@@ -565,7 +565,7 @@ def train(
         else:
             for cfg in ensure_tuple(configs):
                 for _fold in range(self.num_folds):
-                    self.train_single_model(config=cfg, fold=_fold, **kwargs)
+                    self.train_single_model(config=cfg, fold=_fold, gpu_id=device_ids, **kwargs)
 
     def train_parallel_cmd(
         self,

From 218a8f61f76079043de6e7381144461914df9fd9 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 23:36:49 -0700
Subject: [PATCH 06/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index d20d306b9a..a6293114d0 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -570,7 +570,7 @@ def train(
     def train_parallel_cmd(
         self,
         configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES),
-        device_ids: tuple | None = None,
+        device_ids: tuple | list | None = None,
         **kwargs: Any,
     ) -> list:
         """
@@ -579,7 +579,7 @@ def train_parallel_cmd(
         Args:
             configs: configurations that should be trained.
                 Default: ("2d", "3d_fullres", "3d_lowres", "3d_cascade_fullres").
-            device_ids: a tuple of GPU device IDs to use for the training. Default: None (all available GPUs).
+            device_ids: a tuple/list of GPU device IDs to use for the training. Default: None (all available GPUs).
             kwargs: this optional parameter allows you to specify additional arguments defined in the
                 ``train_single_model`` method.
         """

From badcb56db81ce94695b6b31a52324afd29871650 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Fri, 5 May 2023 23:43:57 -0700
Subject: [PATCH 07/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index a6293114d0..bdea96c194 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -712,9 +712,9 @@ def find_best_configuration(
             plans: list of plan identifiers. Default: nnUNetPlans.
             configs: list of configurations. Default: ["2d", "3d_fullres", "3d_lowres", "3d_cascade_fullres"].
             trainers: list of trainers. Default: nnUNetTrainer.
-            allow_ensembling: Set this flag to enable ensembling.
+            allow_ensembling: set this flag to enable ensembling.
             num_processes: number of processes to use for ensembling, postprocessing, etc.
-            overwrite: If set we will overwrite already ensembled files etc. May speed up consecutive
+            overwrite: if set we will overwrite already ensembled files etc. May speed up consecutive
                 runs of this command (not recommended) at the risk of not updating outdated results.
             folds: folds to use. Default: (0, 1, 2, 3, 4).
             strict: a switch that triggers RunTimeError if the logging folder cannot be found. Default: False.

From facb12f5ada769bc83053bef62f0d5a40b7d19ec Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Sat, 6 May 2023 00:20:46 -0700
Subject: [PATCH 08/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index bdea96c194..92e118e007 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -67,6 +67,9 @@ class nnUNetV2Runner:  # noqa: N801
         trainer_class_name: the trainer class names offered by nnUNetV2 exhibit variations in training duration.
             Default: "nnUNetTrainer". Other options: "nnUNetTrainer_Xepoch". X could be one of 1,5,10,20,50,100,
             250,2000,4000,8000.
+        export_validation_probabilities: True to save softmax predictions from final validation as npz
+            files (in addition to predicted segmentations). Needed for finding the best ensemble.
+            Default: True.
         work_dir: working directory to save the intermediate and final results.
 
     Examples:
@@ -145,11 +148,16 @@ class nnUNetV2Runner:  # noqa: N801
     """
 
     def __init__(
-        self, input_config: Any, trainer_class_name: str = "nnUNetTrainer", work_dir: str = "work_dir"
+        self,
+        input_config: Any,
+        trainer_class_name: str = "nnUNetTrainer",
+        work_dir: str = "work_dir",
+        export_validation_probabilities: bool = True,
     ) -> None:
         self.input_info: dict = {}
         self.input_config_or_dict = input_config
         self.trainer_class_name = trainer_class_name
+        self.export_validation_probabilities = export_validation_probabilities
         self.work_dir = work_dir
 
         if isinstance(self.input_config_or_dict, dict):
@@ -496,9 +504,6 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
                     - use_compressed_data: True to use compressed data for training. Reading compressed data is much
                         more CPU and (potentially) RAM intensive and should only be used if you know what you are
                         doing. Default: False.
-                    - export_validation_probabilities: True to save softmax predictions from final validation as npz
-                        files (in addition to predicted segmentations). Needed for finding the best ensemble.
-                        Default: False.
                     - continue_training: continue training from latest checkpoint. Default: False.
                     - only_run_validation: True to run the validation only. Requires training to have finished.
                         Default: False.
@@ -524,6 +529,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
                 configuration=config,
                 fold=fold,
                 trainer_class_name=self.trainer_class_name,
+                export_validation_probabilities=self.export_validation_probabilities,
                 **kwargs,
             )
         else:
@@ -533,6 +539,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
                 fold=fold,
                 num_gpus=len(gpu_id),
                 trainer_class_name=self.trainer_class_name,
+                export_validation_probabilities=self.export_validation_probabilities,
                 **kwargs,
             )
 
@@ -618,7 +625,8 @@ def train_parallel_cmd(
                             "python -m monai.apps.nnunet nnUNetV2Runner train_single_model "
                             + f"--input_config '{self.input_config_or_dict}' --work_dir '{self.work_dir}' "
                             + f"--config '{_config}' --fold {_i} --gpu_id {the_device} "
-                            + f"--trainer_class_name {self.trainer_class_name}"
+                            + f"--trainer_class_name {self.trainer_class_name} "
+                            + f"--export_validation_probabilities {self.export_validation_probabilities}"
                         )
                         for _key, _value in kwargs.items():
                             cmd += f" --{_key} {_value}"
@@ -698,7 +706,7 @@ def find_best_configuration(
         self,
         plans: tuple | str = "nnUNetPlans",
         configs: tuple | str = (M.N_2D, M.N_3D_FULLRES, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES),
-        trainers: tuple | str = "nnUNetTrainer",
+        trainers: tuple | str | None = None,
         allow_ensembling: bool = True,
         num_processes: int = -1,
         overwrite: bool = True,
@@ -726,6 +734,9 @@ def find_best_configuration(
 
         configs = ensure_tuple(configs)
         plans = ensure_tuple(plans)
+
+        if trainers == None:
+            trainers = self.trainer_class_name
         trainers = ensure_tuple(trainers)
 
         models = dumb_trainer_config_plans_to_trained_models_dict(trainers, configs, plans)

From 673bcc17ce646c4f569a33b8db387aaa04766329 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 6 May 2023 07:21:14 +0000
Subject: [PATCH 09/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/nnunet/nnunetv2_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 92e118e007..9b943731f1 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -735,7 +735,7 @@ def find_best_configuration(
         configs = ensure_tuple(configs)
         plans = ensure_tuple(plans)
 
-        if trainers == None:
+        if trainers is None:
             trainers = self.trainer_class_name
         trainers = ensure_tuple(trainers)
 

From bab5f904834827502844e54efbe7469cdd0504d2 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Sat, 6 May 2023 08:00:42 -0700
Subject: [PATCH 10/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 92e118e007..9ebf6755d5 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -510,6 +510,10 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
                     - disable_checkpointing: True to disable checkpointing. Ideal for testing things out and you
                         don't want to flood your hard drive with checkpoints. Default: False.
         """
+        if "num_gpus" in kwargs:
+            kwargs.pop("num_gpus")
+            logger.warning("please use device_id to set the GPUs to use")
+
         if isinstance(gpu_id, tuple):
             if len(gpu_id) > 1:
                 gpu_ids_str = ""

From 0528a769ae2b65e9851498a420b639454906930b Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Sat, 6 May 2023 08:07:42 -0700
Subject: [PATCH 11/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 17bf075ced..0d91ab8d5d 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -484,7 +484,7 @@ def plan_and_process(
         if not no_pp:
             self.preprocess(c, n_proc, overwrite_plans_name, verbose)
 
-    def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **kwargs: Any) -> None:
+    def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int = 0, **kwargs: Any) -> None:
         """
         Run the training on a single GPU with one specified configuration provided.
         Note: this will override the environment variable `CUDA_VISIBLE_DEVICES`.
@@ -514,7 +514,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
             kwargs.pop("num_gpus")
             logger.warning("please use device_id to set the GPUs to use")
 
-        if isinstance(gpu_id, tuple):
+        if isinstance(gpu_id, tuple) or isinstance(gpu_id, list):
             if len(gpu_id) > 1:
                 gpu_ids_str = ""
                 for _i in range(len(gpu_id)):
@@ -550,7 +550,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: int | tuple = 0, **
     def train(
         self,
         configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES),
-        device_ids: tuple | None = None,
+        device_ids: tuple | list | None = None,
         **kwargs: Any,
     ) -> None:
         """

From 2edcd5cd2aeb80e9f69949e40e233d2eb570cca0 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Sat, 6 May 2023 08:27:12 -0700
Subject: [PATCH 12/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 0d91ab8d5d..41584e7093 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -492,7 +492,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int
         Args:
             config: configuration that should be trained. Examples: "2d", "3d_fullres", "3d_lowres".
             fold: fold of the 5-fold cross-validation. Should be an int between 0 and 4.
-            gpu_id: an integer to select the device to use, or a tuple of GPU device indices used for multi-GPU
+            gpu_id: an integer to select the device to use, or a tuple/list of GPU device indices used for multi-GPU
                 training (e.g., (0,1)). Default: 0.
         from nnunetv2.run.run_training import run_training
             kwargs: this optional parameter allows you to specify additional arguments in

From 06a8e2e2649a85c54f31665e997befa0676c2419 Mon Sep 17 00:00:00 2001
From: dongy <dongy@nvidia.com>
Date: Sat, 6 May 2023 08:28:45 -0700
Subject: [PATCH 13/14] update

Signed-off-by: dongy <dongy@nvidia.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index 41584e7093..e7837f1dc8 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -512,7 +512,7 @@ def train_single_model(self, config: Any, fold: int, gpu_id: tuple | list | int
         """
         if "num_gpus" in kwargs:
             kwargs.pop("num_gpus")
-            logger.warning("please use device_id to set the GPUs to use")
+            logger.warning("please use gpu_id to set the GPUs to use")
 
         if isinstance(gpu_id, tuple) or isinstance(gpu_id, list):
             if len(gpu_id) > 1:

From 6d7b3cb42133bcb1d2535c743103a053cf2248b1 Mon Sep 17 00:00:00 2001
From: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com>
Date: Sun, 7 May 2023 02:42:31 +0000
Subject: [PATCH 14/14] fix mypy

Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com>
---
 monai/apps/nnunet/nnunetv2_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/nnunet/nnunetv2_runner.py b/monai/apps/nnunet/nnunetv2_runner.py
index e7837f1dc8..09260c04d4 100644
--- a/monai/apps/nnunet/nnunetv2_runner.py
+++ b/monai/apps/nnunet/nnunetv2_runner.py
@@ -641,7 +641,7 @@ def train_parallel_cmd(
     def train_parallel(
         self,
         configs: tuple | str = (M.N_3D_FULLRES, M.N_2D, M.N_3D_LOWRES, M.N_3D_CASCADE_FULLRES),
-        device_ids: tuple | None = None,
+        device_ids: tuple | list | None = None,
         **kwargs: Any,
     ) -> None:
         """