diff --git a/test/torch_ops_test.py b/test/torch_ops_test.py index 1ed5744d..f248427a 100644 --- a/test/torch_ops_test.py +++ b/test/torch_ops_test.py @@ -233,6 +233,70 @@ def test_allgather_variable_size(self): assert rank_tensor.data.max() == i, \ "bf.allgather(var) produces incorrect gathered tensor" + def test_neighbor_allreduce_sum_precision(self): + """Test that the neighbor all reduce precision (sum) 1D, 2D, 3D tensors correctly.""" + size = bf.size() + rank = bf.rank() + if size <= 1: + fname = inspect.currentframe().f_code.co_name + warnings.warn("Skip {} due to size 1".format(fname)) + return + dtypes = [torch.DoubleTensor] + if TEST_ON_GPU: + dtypes += [torch.cuda.DoubleTensor] + + # By default, we use power two ring topology. + num_indegree = int(np.ceil(np.log2(size))) + neighbor_ranks = [(rank - 2**i) % size for i in range(num_indegree)] + sum_value = np.sum(neighbor_ranks) + rank + sum_value = (len(neighbor_ranks)+1)*(2**-256) + + dims = [1, 2, 3] + for dtype, dim in itertools.product(dtypes, dims): + tensor = torch.DoubleTensor(*([23] * dim)).fill_(1).mul_(2**-256) + tensor = self.cast_and_place(tensor, dtype) + name = "neighbor_allreduce_{}_{}".format(dim, dtype) + nw = {i: 1.0 for i in neighbor_ranks} + reduced_tensor = bf.neighbor_allreduce(tensor, self_weight=1.0, + neighbor_weights=nw, name=name) + assert ( + list(reduced_tensor.shape) == [23] * dim + ), "bf.neighbor_allreduce (avg) produces incorrect reduced shape" + assert ( + (reduced_tensor.data - sum_value).abs().max() == 0 + ), "bf.neighbor_allreduce (avg) produces incorrect reduced tensor" + + def test_neighbor_allreduce_avg_precision(self): + """Test that the neighbor all reduce precision (avg) 1D, 2D, 3D tensors correctly.""" + size = bf.size() + rank = bf.rank() + if size <= 1: + fname = inspect.currentframe().f_code.co_name + warnings.warn("Skip {} due to size 1".format(fname)) + return + dtypes = [torch.DoubleTensor] + if TEST_ON_GPU: + dtypes += [torch.cuda.DoubleTensor] + + # By default, we use power two ring topology. + num_indegree = int(np.ceil(np.log2(size))) + neighbor_ranks = [(rank - 2**i) % size for i in range(num_indegree)] + sum_value = np.sum(neighbor_ranks) + rank + sum_value = 2**-256 + + dims = [1, 2, 3] + for dtype, dim in itertools.product(dtypes, dims): + tensor = torch.DoubleTensor(*([23] * dim)).fill_(1).mul_(2**-256) + tensor = self.cast_and_place(tensor, dtype) + name = "neighbor_allreduce_{}_{}".format(dim, dtype) + reduced_tensor = bf.neighbor_allreduce(tensor, name=name) + assert ( + list(reduced_tensor.shape) == [23] * dim + ), "bf.neighbor_allreduce (avg) produces incorrect reduced shape" + assert ( + (reduced_tensor.data - sum_value).abs().max() == 0 + ), "bf.neighbor_allreduce (avg) produces incorrect reduced tensor" + def test_neighbor_allreduce_avg(self): """Test that the neighbor all reduce (avg) 1D, 2D, 3D tensors correctly.""" size = bf.size()