Skip to content

Commit

Permalink
Add Double precision test for neighbor_allreduce
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanbin Hu committed Jun 12, 2020
1 parent ff22ae2 commit 0b92bb2
Showing 1 changed file with 64 additions and 0 deletions.
64 changes: 64 additions & 0 deletions test/torch_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,70 @@ def test_allgather_variable_size(self):
assert rank_tensor.data.max() == i, \
"bf.allgather(var) produces incorrect gathered tensor"

def test_neighbor_allreduce_sum_precision(self):
"""Test that the neighbor all reduce precision (sum) 1D, 2D, 3D tensors correctly."""
size = bf.size()
rank = bf.rank()
if size <= 1:
fname = inspect.currentframe().f_code.co_name
warnings.warn("Skip {} due to size 1".format(fname))
return
dtypes = [torch.DoubleTensor]
if TEST_ON_GPU:
dtypes += [torch.cuda.DoubleTensor]

# By default, we use power two ring topology.
num_indegree = int(np.ceil(np.log2(size)))
neighbor_ranks = [(rank - 2**i) % size for i in range(num_indegree)]
sum_value = np.sum(neighbor_ranks) + rank
sum_value = (len(neighbor_ranks)+1)*(2**-256)

dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
tensor = torch.DoubleTensor(*([23] * dim)).fill_(1).mul_(2**-256)
tensor = self.cast_and_place(tensor, dtype)
name = "neighbor_allreduce_{}_{}".format(dim, dtype)
nw = {i: 1.0 for i in neighbor_ranks}
reduced_tensor = bf.neighbor_allreduce(tensor, self_weight=1.0,
neighbor_weights=nw, name=name)
assert (
list(reduced_tensor.shape) == [23] * dim
), "bf.neighbor_allreduce (avg) produces incorrect reduced shape"
assert (
(reduced_tensor.data - sum_value).abs().max() == 0
), "bf.neighbor_allreduce (avg) produces incorrect reduced tensor"

def test_neighbor_allreduce_avg_precision(self):
"""Test that the neighbor all reduce precision (avg) 1D, 2D, 3D tensors correctly."""
size = bf.size()
rank = bf.rank()
if size <= 1:
fname = inspect.currentframe().f_code.co_name
warnings.warn("Skip {} due to size 1".format(fname))
return
dtypes = [torch.DoubleTensor]
if TEST_ON_GPU:
dtypes += [torch.cuda.DoubleTensor]

# By default, we use power two ring topology.
num_indegree = int(np.ceil(np.log2(size)))
neighbor_ranks = [(rank - 2**i) % size for i in range(num_indegree)]
sum_value = np.sum(neighbor_ranks) + rank
sum_value = 2**-256

dims = [1, 2, 3]
for dtype, dim in itertools.product(dtypes, dims):
tensor = torch.DoubleTensor(*([23] * dim)).fill_(1).mul_(2**-256)
tensor = self.cast_and_place(tensor, dtype)
name = "neighbor_allreduce_{}_{}".format(dim, dtype)
reduced_tensor = bf.neighbor_allreduce(tensor, name=name)
assert (
list(reduced_tensor.shape) == [23] * dim
), "bf.neighbor_allreduce (avg) produces incorrect reduced shape"
assert (
(reduced_tensor.data - sum_value).abs().max() == 0
), "bf.neighbor_allreduce (avg) produces incorrect reduced tensor"

def test_neighbor_allreduce_avg(self):
"""Test that the neighbor all reduce (avg) 1D, 2D, 3D tensors correctly."""
size = bf.size()
Expand Down

0 comments on commit 0b92bb2

Please sign in to comment.