diff --git a/python/oneflow/framework/unittest.py b/python/oneflow/framework/unittest.py index 2d873b8e86e..b31a93641a2 100644 --- a/python/oneflow/framework/unittest.py +++ b/python/oneflow/framework/unittest.py @@ -104,7 +104,7 @@ def has_node_list(): def node_size(): node_num_from_env = os.getenv("ONEFLOW_TEST_NODE_NUM", None) if node_num_from_env: - return node_num_from_env + return int(node_num_from_env) elif has_node_list(): node_list_from_env = node_list() return len(node_list_from_env) diff --git a/python/oneflow/test/modules/test_allreduce.py b/python/oneflow/test/modules/test_allreduce.py index b1baa752aa8..f50fe2b56a6 100644 --- a/python/oneflow/test/modules/test_allreduce.py +++ b/python/oneflow/test/modules/test_allreduce.py @@ -34,30 +34,16 @@ def test_all_reduce(test_case): x = flow.Tensor(arr_rank2) else: raise ValueError - x = x.to(f"cuda:{flow.distributed.get_local_rank()}") - nccl_allreduce_op = ( - flow.builtin_op("eager_nccl_all_reduce") - .Input("in") - .Output("out") - .Attr("parallel_conf", f'device_tag: "gpu", device_name: "0:0-1"') - .Build() - ) - y = nccl_allreduce_op(x)[0] + x = x.to("cuda") + y = flow.F.all_reduce(x) test_case.assertTrue(np.allclose(y.numpy(), arr_rank1 + arr_rank2)) @flow.unittest.skip_unless_2n2d() def test_all_reduce_2nodes(test_case): np_arr = np.array([1, 2]) - x = flow.Tensor(np_arr * flow.distributed.get_rank()) - x = x.to(f"cuda:{flow.distributed.get_local_rank()}") - nccl_allreduce_op = ( - flow.builtin_op("eager_nccl_all_reduce") - .Input("in") - .Output("out") - .Attr("parallel_conf", f'device_tag: "gpu", device_name: "0-1:0-1"') - .Build() - ) - y = nccl_allreduce_op(x)[0] + x = flow.Tensor(np_arr * (flow.distributed.get_rank() + 1)) + x = x.to("cuda") + y = flow.F.all_reduce(x) test_case.assertTrue(np.allclose(y.numpy(), np_arr * 10))