From 73b9a1fbaaa57dcb42aedfbfd7fbefa810190975 Mon Sep 17 00:00:00 2001 From: daquexian Date: Tue, 24 Aug 2021 18:02:24 +0800 Subject: [PATCH 1/2] skip sync when abnormal exiting Signed-off-by: daquexian --- python/oneflow/__init__.py | 55 ++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py index f001b0e32ef..c711f41ae78 100644 --- a/python/oneflow/__init__.py +++ b/python/oneflow/__init__.py @@ -14,6 +14,7 @@ limitations under the License. """ +import sys import collections import oneflow._oneflow_internal @@ -92,19 +93,51 @@ def is_deprecated(func_or_class): del register_python_callback -def _SyncOnMasterFn(): - if not oneflow._oneflow_internal.IsEnvInited(): - return - if oneflow.framework.distribute.is_multi_client(): - oneflow._oneflow_internal.eager.multi_client.Sync() - elif oneflow.framework.distribute.get_rank() == 0: - oneflow._oneflow_internal.eager.single_client.Sync() +class ExitHook: + def __init__(self): + self.exit_code = None + self.exception = None + self._orig_exit = sys.exit + self._orig_excepthook = sys.excepthook -atexit.register(oneflow._oneflow_internal.SetShuttingDown) -atexit.register(oneflow._oneflow_internal.DestroyEnv) -atexit.register(oneflow.framework.session_context.TryCloseDefaultSession) -atexit.register(_SyncOnMasterFn) + def exit(code=0): + self.exit_code = code + self._orig_exit(code) + sys.exit = exit + + def exc_handler(exc_type, exc, *args): + self.exception = exc + self._orig_excepthook(exc_type, exc, *args) + + sys.excepthook = exc_handler + + def is_normal_exit(self): + if self.exit_code is not None: + return self.exit_code == 0 + return self.exception is None + + +hook = ExitHook() + + +def atexit_hook(hook): + if hook.is_normal_exit(): + if oneflow._oneflow_internal.IsEnvInited(): + if oneflow.framework.distribute.is_multi_client(): + oneflow._oneflow_internal.eager.multi_client.Sync() + elif oneflow.framework.distribute.get_rank() == 0: + oneflow._oneflow_internal.eager.single_client.Sync() + oneflow.framework.session_context.TryCloseDefaultSession() + if hook.is_normal_exit(): + oneflow._oneflow_internal.DestroyEnv() + oneflow._oneflow_internal.SetShuttingDown() + + +atexit.register(atexit_hook, hook) +del atexit_hook +del hook +del ExitHook del atexit del oneflow import oneflow.framework.docstr as docstr From dabd5b6772efa3e19ced27d6a7b2ebf75674bdff Mon Sep 17 00:00:00 2001 From: oneflow-ci-bot Date: Tue, 24 Aug 2021 12:12:03 +0000 Subject: [PATCH 2/2] auto format by CI --- python/oneflow/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py index c711f41ae78..ae96ea9d3f6 100644 --- a/python/oneflow/__init__.py +++ b/python/oneflow/__init__.py @@ -104,6 +104,7 @@ def __init__(self): def exit(code=0): self.exit_code = code self._orig_exit(code) + sys.exit = exit def exc_handler(exc_type, exc, *args):