Rerun failed fragment generation runs.

Kortemme-Lab · Jan 22, 2018 · 309a9be · 309a9be
1 parent 1a0c0b6
commit 309a9be
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 6 deletions.
diff --git a/pull_into_place/commands/07_setup_design_fragments.py b/pull_into_place/commands/07_setup_design_fragments.py
@@ -10,7 +10,7 @@
     pull_into_place 07_setup_design_fragments <workspace> <round> [options]
 
 Options:
-    -m, --mem-free=MEM  [default: 2]
+    -m, --mem-free=MEM  [default: 10]
         The amount of memory (GB) to request from the cluster.  Bigger systems 
         may need more memory, but making large memory requests can make jobs 
         take much longer to come off the queue (since there may only be a few 
@@ -19,6 +19,14 @@
     -d, --dry-run
         Print out the command-line that would be used to generate fragments, 
         but don't actually run it.
+
+    -x, --clear
+        Remove any previously generated fragment files.
+
+Simply rerun this command if some of your fragment generation jobs fail.  By 
+default it will only submit jobs for inputs that are missing valid fragment 
+files.  You can force the fragments to be regenerated from scratch by passing 
+the '--clear' flag.
 """
 
 import subprocess
@@ -33,21 +41,51 @@ def main():
     workspace = pipeline.ValidatedDesigns(args['<workspace>'], args['<round>'])
     workspace.check_paths()
     workspace.check_rosetta()
+    workspace.make_dirs()
 
-    # Run the fragment generation script.
+    # Do this before working out the 'klab_generate_fragments' command, because 
+    # it may affect which inputs are picked.
+    if args['--clear'] and not args['--dry-run']:
+        workspace.clear_fragments()
 
     generate_fragments = [
             'klab_generate_fragments',
             '--loops_file', workspace.loops_path,
             '--outdir', workspace.fragments_dir,
             '--memfree', args['--mem-free'],
-            workspace.input_dir,
-    ]
+    ] +     pick_inputs(workspace)
 
     if args['--dry-run']:
         print ' '.join(generate_fragments)
     else:
-        workspace.make_dirs()
-        workspace.clear_fragments()
         subprocess.call(generate_fragments)
 
+def pick_inputs(workspace):
+    """
+    Figure out which inputs don't yet have fragments.
+    
+    This is useful when some of your fragment generation jobs fail and you need 
+    to rerun them.  
+    """
+    frags_present = set()
+    frags_absent = set()
+
+    for path in workspace.input_paths:
+        if workspace.fragments_missing(path):
+            frags_absent.add(path)
+        else:
+            frags_present.add(path)
+
+    # If no fragments have been generated yet, just return the directory to 
+    # make the resulting 'klab_generate_fragments' command a little simpler.
+    if not frags_present:
+        return [workspace.input_dir]
+
+    print '{0} of {1} inputs are missing fragments.'.format(
+        len(frags_absent), len(workspace.input_paths))
+
+    return sorted(frags_absent)
+
+
+
+
diff --git a/pull_into_place/pipeline.py b/pull_into_place/pipeline.py
@@ -389,6 +389,25 @@ def fragments_dir(self):
     def fragments_tag(self, input_path):
         return os.path.basename(input_path)[:4]
 
+    def fragments_missing(self, input_path):
+        tag = self.fragments_tag(input_path)
+        frag_dir_glob = os.path.join(self.fragments_dir, tag+'?')
+        frag_dirs = glob.glob(frag_dir_glob)
+
+        # If there aren't any fragment directories, then there are definitely 
+        # no fragments.
+        if not frag_dirs:
+            return True
+
+        # If there are any fragment directories without fragment maps, then a 
+        # job died and we're missing some fragments.
+        for dir in frag_dirs:
+            frag_map_path = os.path.join(dir, 'fragment_file_map.json')
+            if not os.path.exists(frag_map_path):
+                return True
+
+        return False
+
     def fragments_info(self, input_path):
         # Typically, there is one output directory for each chain that
         # fragments are being generated for.