Skip to content

Commit

Permalink
Add support for examples under one GPU without NCCL
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanbin Hu committed Nov 6, 2020
1 parent 115b909 commit 473ec22
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 9 deletions.
3 changes: 2 additions & 1 deletion examples/pytorch_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@
bf.init()

if args.cuda:
torch.cuda.set_device(bf.local_rank())
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
torch.cuda.set_device(device_id)
cudnn.benchmark = True

# Set up standard model.
Expand Down
3 changes: 2 additions & 1 deletion examples/pytorch_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@

if args.cuda:
# Bluefog: pin GPU to local rank.
torch.cuda.set_device(bf.local_rank())
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
torch.cuda.set_device(device_id)
torch.cuda.manual_seed(args.seed)


Expand Down
3 changes: 2 additions & 1 deletion examples/pytorch_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@
if args.cuda:
print("using cuda.")
# Bluefog: pin GPU to local rank.
torch.cuda.set_device(bf.local_rank())
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
torch.cuda.set_device(device_id)
torch.cuda.manual_seed(args.seed)
else:
print("using cpu")
Expand Down
7 changes: 4 additions & 3 deletions test/torch_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ def convert_cpu_fp16_to_fp32(self, *values):

def cast_and_place(self, tensor, dtype):
if dtype.is_cuda:
if bf.local_size() > torch.cuda.device_count():
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
raise EnvironmentError(
"Cannot run number of processes in one machine are more than device count")
return tensor.cuda(bf.local_rank()).type(dtype)
"Cannot run number of processes in one machine more than GPU device count"
" in NCCL environment")
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
return tensor.type(dtype)

def test_broadcast(self):
Expand Down
7 changes: 4 additions & 3 deletions test/torch_win_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ def tearDown(self):
@staticmethod
def cast_and_place(tensor, dtype):
if dtype.is_cuda:
if bf.local_size() > torch.cuda.device_count():
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
raise EnvironmentError(
"Cannot run number of processes in one machine are more than device count")
return tensor.cuda(bf.local_rank()).type(dtype)
"Cannot run number of processes in one machine more than GPU device count"
" in NCCL environment")
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
return tensor.type(dtype)

def test_win_create_and_sync_and_free(self):
Expand Down

0 comments on commit 473ec22

Please sign in to comment.