From ec2aab8e97b3ac6a8350fdcf3cf84ee3a3400cce Mon Sep 17 00:00:00 2001 From: Brandon Walker <43654521+misterbrandonwalker@users.noreply.github.com> Date: Wed, 20 Sep 2023 07:40:26 -0500 Subject: [PATCH] Fix pickling error when jobstate file doesnt exist and fix threading error when lock file exists then disappears (#4575) Co-authored-by: Brandon Walker Co-authored-by: Adam Novak --- src/toil/fileStores/nonCachingFileStore.py | 4 ++++ src/toil/lib/threading.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/toil/fileStores/nonCachingFileStore.py b/src/toil/fileStores/nonCachingFileStore.py index 3fd08e4fcc..ccf87f7967 100644 --- a/src/toil/fileStores/nonCachingFileStore.py +++ b/src/toil/fileStores/nonCachingFileStore.py @@ -303,6 +303,10 @@ def _getAllJobStates(cls, coordination_dir: str) -> Iterator[Dict[str, str]]: # This is a FileNotFoundError. # job finished & deleted its jobState file since the jobState files were discovered continue + elif e.errno == 5: + # This is a OSError: [Errno 5] Input/output error (jobStatefile seems to disappear + # on network file system sometimes) + continue else: raise diff --git a/src/toil/lib/threading.py b/src/toil/lib/threading.py index 1ebcf4ff86..72166cf0c7 100644 --- a/src/toil/lib/threading.py +++ b/src/toil/lib/threading.py @@ -533,7 +533,12 @@ def leave(self) -> Iterator[bool]: # There is someone claiming to be here. Are they alive? full_path = os.path.join(self.lockfileDir, item) - fd = os.open(full_path, os.O_RDONLY) + try: + fd = os.open(full_path, os.O_RDONLY) + except OSError as e: + # suddenly file doesnt exist on network file system? + continue + try: fcntl.lockf(fd, fcntl.LOCK_SH | fcntl.LOCK_NB) except OSError as e: