From 8e076811cb5e417dcef050a1045e1ad6b5070b57 Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Mon, 5 Jul 2021 13:52:56 +0800 Subject: [PATCH 1/4] [DLMED] add horovod tests Signed-off-by: Nic Ma --- tests/test_evenly_divisible_all_gather_hvd.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/test_evenly_divisible_all_gather_hvd.py diff --git a/tests/test_evenly_divisible_all_gather_hvd.py b/tests/test_evenly_divisible_all_gather_hvd.py new file mode 100644 index 0000000000..33c0e93a66 --- /dev/null +++ b/tests/test_evenly_divisible_all_gather_hvd.py @@ -0,0 +1,57 @@ +# Copyright 2020 - 2021 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import horovod.torch as hvd + +from monai.utils import evenly_divisible_all_gather + + +class HvdEvenlyDivisibleAllGather: + def test_data(self): + # initialize Horovod + hvd.init() + if torch.cuda.is_available(): + torch.cuda.set_device(hvd.local_rank()) + self._run() + + def _run(self): + if hvd.rank() == 0: + data1 = torch.tensor([[1, 2], [3, 4]]) + data2 = torch.tensor([[1.0, 2.0]]) + data3 = torch.tensor(7) + + if hvd.rank() == 1: + data1 = torch.tensor([[5, 6]]) + data2 = torch.tensor([[3.0, 4.0], [5.0, 6.0]]) + data3 = torch.tensor(8) + + result1 = evenly_divisible_all_gather(data=data1, concat=True) + torch.testing.assert_allclose(result1, torch.tensor([[1, 2], [3, 4], [5, 6]])) + result2 = evenly_divisible_all_gather(data=data2, concat=False) + for r, e in zip(result2, [torch.tensor([[1.0, 2.0]]), torch.tensor([[3.0, 4.0], [5.0, 6.0]])]): + torch.testing.assert_allclose(r, e) + result3 = evenly_divisible_all_gather(data=data3, concat=False) + for r in result3: + torch.testing.assert_allclose(r.ndimension(), 0) + + +if __name__ == "__main__": + """ + 1. Install Horovod: + `HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib/x86_64-linux-gnu HOROVOD_GPU_OPERATIONS=NCCL \ + HOROVOD_NCCL_LINK=SHARED pip install --no-cache-dir horovod` + + 2. Execute on 2 GPUs in a single machine: + `horovodrun -np 2 python test_evenly_divisible_all_gather_hvd.py` + + """ + HvdEvenlyDivisibleAllGather().test_data() From f1d6c91d7b47c1bc2078b981a6d55665196fd62e Mon Sep 17 00:00:00 2001 From: monai-bot Date: Mon, 5 Jul 2021 06:22:57 +0000 Subject: [PATCH 2/4] [MONAI] python code formatting Signed-off-by: monai-bot --- tests/test_evenly_divisible_all_gather_hvd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_evenly_divisible_all_gather_hvd.py b/tests/test_evenly_divisible_all_gather_hvd.py index 33c0e93a66..42b2e9530d 100644 --- a/tests/test_evenly_divisible_all_gather_hvd.py +++ b/tests/test_evenly_divisible_all_gather_hvd.py @@ -9,8 +9,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import torch import horovod.torch as hvd +import torch from monai.utils import evenly_divisible_all_gather From 40df9f6f14897d6e7fa71ce09b74893292640048 Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Mon, 5 Jul 2021 14:42:20 +0800 Subject: [PATCH 3/4] [DLMED] skip min tests Signed-off-by: Nic Ma --- tests/min_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/min_tests.py b/tests/min_tests.py index a3f140b856..571f934524 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -134,6 +134,7 @@ def run_testsuit(): "test_unetr", "test_unetr_block", "test_vit", + "test_evenly_divisible_all_gather_hvd", ] assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}" From 43f03208e47c27509387cc157ab3b5aa78a626b0 Mon Sep 17 00:00:00 2001 From: Nic Ma Date: Mon, 5 Jul 2021 22:41:51 +0800 Subject: [PATCH 4/4] [DLMED] rename to skip auto CI Signed-off-by: Nic Ma --- ...ible_all_gather_hvd.py => hvd_evenly_divisible_all_gather.py} | 0 tests/min_tests.py | 1 - 2 files changed, 1 deletion(-) rename tests/{test_evenly_divisible_all_gather_hvd.py => hvd_evenly_divisible_all_gather.py} (100%) diff --git a/tests/test_evenly_divisible_all_gather_hvd.py b/tests/hvd_evenly_divisible_all_gather.py similarity index 100% rename from tests/test_evenly_divisible_all_gather_hvd.py rename to tests/hvd_evenly_divisible_all_gather.py diff --git a/tests/min_tests.py b/tests/min_tests.py index 571f934524..a3f140b856 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -134,7 +134,6 @@ def run_testsuit(): "test_unetr", "test_unetr_block", "test_vit", - "test_evenly_divisible_all_gather_hvd", ] assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"