Skip to content

Commit

Permalink
Make SavePrimaryDB faster
Browse files Browse the repository at this point in the history
We avoid reading in the entire file by just reading the first
byte. Also, this reduces the amount of waiting between polls, so small
test runs complete noticeably faster.

As a side effect of streaming the file, we have to prevent the use of
toil caching. (readGlobalFileStream doesn't have a cache=False option.)
  • Loading branch information
joelarmstrong committed Jul 6, 2017
1 parent 26fcc44 commit 08694b3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
10 changes: 5 additions & 5 deletions src/cactus/pipeline/cactus_workflow.py
Expand Up @@ -300,11 +300,11 @@ def run(self, fileStore):
stopKtserver(dbElem)
# Wait for the file to appear in the right place. This may take a while
while True:
path = fileStore.readGlobalFile(self.cactusWorkflowArguments.snapshotID, cache=False)
stat = os.stat(path)
if stat.st_size > 0:
break
time.sleep(60)
with fileStore.readGlobalFileStream(self.cactusWorkflowArguments.snapshotID) as f:
if f.read(1) != '':
# The file is no longer empty
break
time.sleep(10)
# We have the file now
intermediateResultsUrl = getattr(self.cactusWorkflowArguments, 'intermediateResultsUrl', None)
if intermediateResultsUrl is not None:
Expand Down
7 changes: 6 additions & 1 deletion src/cactus/progressive/cactus_progressive.py
Expand Up @@ -442,7 +442,12 @@ def main():
setLoggingFromOptions(options)

options.cactusDir = os.path.abspath(options.cactusDir)


# Caching generally slows down the cactus workflow, plus some
# methods like readGlobalFileStream don't support forced
# reads directly from the job store rather than from cache.
options.disableCaching = True

#Create the progressive cactus project
projWrapper = ProjectWrapper(options)
projWrapper.writeXml()
Expand Down

0 comments on commit 08694b3

Please sign in to comment.