MannCode
diff --git a/‎test/distributed/test_data_parallel.py‎
Lines changed: 12 additions & 10 deletions b/‎test/distributed/test_data_parallel.py‎
Lines changed: 12 additions & 10 deletions
@@ -19,6 +19,8 @@
 from torch.testing._internal.common_utils import skip_but_pass_in_sandcastle_if
 import torch.nn.functional as F
 
+torch.set_default_dtype(torch.double)
+
 NO_NCCL = not hasattr(torch.distributed, "ProcessGroupNCCL")
 
 # batched grad doesn't support data parallel
@@ -38,11 +40,11 @@ def __init__(self, t):
             def forward(self, x):
                 return x * self.t_rg + self.t_not_rg
 
-        m = TestModule(torch.randn(100, device='cuda', requires_grad=True, dtype=torch.double))
+        m = TestModule(torch.randn(100, device='cuda', requires_grad=True))
         self.assertTrue(m.t_rg.requires_grad)
 
         dpm = nn.DataParallel(m, [0, 1])
-        inp = torch.randn(2, 100, device='cuda', dtype=torch.double)
+        inp = torch.randn(2, 100, device='cuda')
 
         def fn(t):
             return dpm(inp)
@@ -510,11 +512,11 @@ def _test_scatter(self, tensor):
 
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "multi-GPU not supported")
     def test_scatter_cpu(self):
-        self._test_scatter(torch.randn((4, 4), dtype=torch.double))
+        self._test_scatter(torch.randn((4, 4)))
 
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "multi-GPU not supported")
     def test_scatter_gpu(self):
-        self._test_scatter(torch.randn((4, 4), dtype=torch.double).cuda())
+        self._test_scatter(torch.randn((4, 4)).cuda())
 
     @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "At least 2 CUDA GPUS needed")
     @skip_but_pass_in_sandcastle_if(NO_NCCL, "NCCL needed")
@@ -537,8 +539,8 @@ def forward(self, x):
 
     def _test_gather(self, output_device):
         inputs = (
-            torch.randn(2, 4, device='cuda:0', requires_grad=True, dtype=torch.double),
-            torch.randn(2, 4, device='cuda:1', requires_grad=True, dtype=torch.double),
+            torch.randn(2, 4, device='cuda:0', requires_grad=True),
+            torch.randn(2, 4, device='cuda:1', requires_grad=True),
         )
         result = dp.gather(inputs, output_device)
         self.assertEqual(result.size(), torch.Size([4, 4]))
@@ -548,7 +550,7 @@ def _test_gather(self, output_device):
             self.assertEqual(result.get_device(), output_device)
         else:
             self.assertFalse(result.is_cuda)
-        grad = torch.randn((4, 4), dtype=torch.double)
+        grad = torch.randn((4, 4))
         if output_device != -1:
             grad = grad.cuda(output_device)
         result.backward(grad)
@@ -558,8 +560,8 @@ def _test_gather(self, output_device):
 
         # test scalar inputs, should stack into a vector in this case
         inputs = (
-            torch.randn((), device='cuda:0', requires_grad=True, dtype=torch.double),
-            torch.randn((), device='cuda:1', requires_grad=True, dtype=torch.double),
+            torch.randn((), device='cuda:0', requires_grad=True),
+            torch.randn((), device='cuda:1', requires_grad=True),
         )
         result = dp.gather(inputs, output_device)
         self.assertEqual(result.size(), torch.Size([2]))
@@ -569,7 +571,7 @@ def _test_gather(self, output_device):
             self.assertEqual(result.get_device(), output_device)
         else:
             self.assertFalse(result.is_cuda)
-        grad = torch.randn(2, dtype=torch.double)
+        grad = torch.randn(2)
         if output_device != -1:
             grad = grad.cuda(output_device)
         result.backward(grad)