Skip to content

Commit

Permalink
Rerun failed fragment generation runs.
Browse files Browse the repository at this point in the history
  • Loading branch information
kalekundert committed Jan 22, 2018
1 parent 1a0c0b6 commit 309a9be
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 6 deletions.
50 changes: 44 additions & 6 deletions pull_into_place/commands/07_setup_design_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
pull_into_place 07_setup_design_fragments <workspace> <round> [options]
Options:
-m, --mem-free=MEM [default: 2]
-m, --mem-free=MEM [default: 10]
The amount of memory (GB) to request from the cluster. Bigger systems
may need more memory, but making large memory requests can make jobs
take much longer to come off the queue (since there may only be a few
Expand All @@ -19,6 +19,14 @@
-d, --dry-run
Print out the command-line that would be used to generate fragments,
but don't actually run it.
-x, --clear
Remove any previously generated fragment files.
Simply rerun this command if some of your fragment generation jobs fail. By
default it will only submit jobs for inputs that are missing valid fragment
files. You can force the fragments to be regenerated from scratch by passing
the '--clear' flag.
"""

import subprocess
Expand All @@ -33,21 +41,51 @@ def main():
workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>'])
workspace.check_paths()
workspace.check_rosetta()
workspace.make_dirs()

# Run the fragment generation script.
# Do this before working out the 'klab_generate_fragments' command, because
# it may affect which inputs are picked.
if args['--clear'] and not args['--dry-run']:
workspace.clear_fragments()

generate_fragments = [
'klab_generate_fragments',
'--loops_file', workspace.loops_path,
'--outdir', workspace.fragments_dir,
'--memfree', args['--mem-free'],
workspace.input_dir,
]
] + pick_inputs(workspace)

if args['--dry-run']:
print ' '.join(generate_fragments)
else:
workspace.make_dirs()
workspace.clear_fragments()
subprocess.call(generate_fragments)

def pick_inputs(workspace):
"""
Figure out which inputs don't yet have fragments.
This is useful when some of your fragment generation jobs fail and you need
to rerun them.
"""
frags_present = set()
frags_absent = set()

for path in workspace.input_paths:
if workspace.fragments_missing(path):
frags_absent.add(path)
else:
frags_present.add(path)

# If no fragments have been generated yet, just return the directory to
# make the resulting 'klab_generate_fragments' command a little simpler.
if not frags_present:
return [workspace.input_dir]

print '{0} of {1} inputs are missing fragments.'.format(
len(frags_absent), len(workspace.input_paths))

return sorted(frags_absent)




19 changes: 19 additions & 0 deletions pull_into_place/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,25 @@ def fragments_dir(self):
def fragments_tag(self, input_path):
return os.path.basename(input_path)[:4]

def fragments_missing(self, input_path):
tag = self.fragments_tag(input_path)
frag_dir_glob = os.path.join(self.fragments_dir, tag+'?')
frag_dirs = glob.glob(frag_dir_glob)

# If there aren't any fragment directories, then there are definitely
# no fragments.
if not frag_dirs:
return True

# If there are any fragment directories without fragment maps, then a
# job died and we're missing some fragments.
for dir in frag_dirs:
frag_map_path = os.path.join(dir, 'fragment_file_map.json')
if not os.path.exists(frag_map_path):
return True

return False

def fragments_info(self, input_path):
# Typically, there is one output directory for each chain that
# fragments are being generated for.
Expand Down

0 comments on commit 309a9be

Please sign in to comment.