Skip to content

Commit

Permalink
fix error on exiting (#5053)
Browse files Browse the repository at this point in the history
* fix error on exiting

Signed-off-by: daquexian <daquexian566@gmail.com>

* lazily get rank

Signed-off-by: daquexian <daquexian566@gmail.com>

Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
  • Loading branch information
daquexian and oneflow-ci-bot committed May 31, 2021
1 parent e861dd3 commit 5133512
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions oneflow/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@
del env_util


def SyncOnMaster():
if oneflow.distributed.get_rank() == 0:
oneflow._oneflow_internal.eager.single_client.Sync
# capture oneflow methods so that they can be still accessed after `del oneflow`
def _SyncOnMasterFn(get_rank, sync):
def SyncOnMaster():
if get_rank() == 0:
sync()

return SyncOnMaster


atexit.register(oneflow._oneflow_internal.SetShuttingDown)
Expand All @@ -80,7 +84,12 @@ def SyncOnMaster():
# Global<ResourceDesc, ForSession>::Get(), used by vm in background thread,
# will be set to nullptr by TryCloseDefaultSession,
# so sync vm in advance to avoid data race
atexit.register(SyncOnMaster)
atexit.register(
_SyncOnMasterFn(
oneflow.python.framework.distribute.get_rank,
oneflow._oneflow_internal.eager.single_client.Sync,
)
)
del atexit

import sys
Expand Down

0 comments on commit 5133512

Please sign in to comment.