diff --git a/megatron/initialize.py b/megatron/initialize.py index af801efa40..9f3bd421bf 100644 --- a/megatron/initialize.py +++ b/megatron/initialize.py @@ -192,13 +192,13 @@ def _initialize_distributed(): else: args.local_rank = device torch.cuda.set_device(device) - # Call the init process - torch.distributed.init_process_group( - backend=args.distributed_backend, - world_size=args.world_size, - rank=args.rank, - timeout=timedelta(minutes=args.distributed_timeout_minutes), - ) + # Call the init process + torch.distributed.init_process_group( + backend=args.distributed_backend, + world_size=args.world_size, + rank=args.rank, + timeout=timedelta(minutes=args.distributed_timeout_minutes), + ) # Set the tensor model-parallel, pipeline model-parallel, and # data-parallel communicators.