I think we can write codnor dag file now

Gravity-Spy · Oct 30, 2018 · 1f41d9e · 1f41d9e
1 parent 0e4951d
commit 1f41d9e
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 20 deletions.
diff --git a/bin/promote_users b/bin/promote_users
@@ -92,11 +92,14 @@ if args.update_website:
     project = Project.find(slug='zooniverse/gravity-spy')
 
     def update_settings(x):
-        user = User.find(x.userID)
-        new_settings = {"workflow_id": "{0}".format(level_dict[x.curr_level - 1])}
-        print(user)
-        print(new_settings)
-        ProjectPreferences.save_settings(project=project, user=user, settings=new_settings)
+        try:
+            user = User.find(x.userID)
+            new_settings = {"workflow_id": "{0}".format(level_dict[x.curr_level - 1])}
+            print(user)
+            print(new_settings)
+            ProjectPreferences.save_settings(project=project, user=user, settings=new_settings)
+        except:
+            print('This user promotion failed: {0}'.format(x.userID))
 
     updates.apply(update_settings, axis=1)
 

diff --git a/bin/wscan b/bin/wscan
@@ -47,12 +47,12 @@ def parse_commandline():
                         required=True)
     parser.add_argument("--path-to-semantic-file",
                         help="Path to name of similarity model",
-                        required=True)
+                        default=None)
     parser.add_argument("--project-info-pickle",
                         help="This pickle file holds information"
                         "about what workflows a image with what"
                         "confidence label should go into",
-                        required=True)
+                        default=None)
     parser.add_argument("--hdf5", action="store_true", default=False,
                         help="Store triggers in local hdf5 table format")
     parser.add_argument("--sql", action="store_true", default=False,
@@ -70,14 +70,16 @@ def parse_commandline():
 ##########################                     ################################
 ###############################################################################
 
-def main(channel_name, frametype, event_time, project_info_pickle,
-         gid, plot_directory, path_to_cnn, path_to_similarity_search,
+def main(channel_name, frametype, event_time, gid, plot_directory,
+         path_to_cnn, project_info_pickle=None, path_to_similarity_search=None,
          gravityspy_id=True, hdf5=False, sql=False, verbose=False):
 
     if not os.path.isfile(path_to_cnn):
         raise ValueError('The provided CNN model does not '
                          'exist.')
-    if not os.path.isfile(path_to_similarity_search):
+
+    if ((path_to_similarity_search is not None) and
+        (not os.path.isfile(path_to_similarity_search))):
         raise ValueError('The provided similarity model does not '
                          'exist.')
 
@@ -123,10 +125,12 @@ def main(channel_name, frametype, event_time, project_info_pickle,
                        id_string=idstring,
                        frametype=frametype, plot_directory=plot_directorytmp)
 
-    results.determine_workflow_and_subjectset(project_info_pickle)
+    if project_info_pickle is not None:
+        results.determine_workflow_and_subjectset(project_info_pickle)
 
-    features = utils.get_features(plot_directory=plot_directorytmp,
-                                  path_to_semantic_model=path_to_similarity_search)
+    if path_to_similarity_search is not None:
+        features = utils.get_features(plot_directory=plot_directorytmp,
+                                      path_to_semantic_model=path_to_similarity_search)
 
     # Create directory called "Classified" were images that were successfully classified go.
     final_path = os.path.join(plot_directory, 'Classified')
@@ -135,15 +139,17 @@ def main(channel_name, frametype, event_time, project_info_pickle,
         os.makedirs(final_path)
 
     if sql:
-        features.to_sql(table='updated_similarity_index')
+        if path_to_similarity_search is not None:
+            features.to_sql(table='updated_similarity_index')
         results.to_sql()
     elif hdf5:
         results.convert_unicode_to_bytestring()
-        features.convert_unicode_to_bytestring()
-        features.write(os.path.join(final_path, 'features.hdf5'),
-                       path='/{0}'.format(idstring), format='hdf5')
         results.write(os.path.join(final_path, 'classification.hdf5'),
                       path='/{0}'.format(idstring), format='hdf5')
+        if path_to_similarity_search is not None:
+            features.convert_unicode_to_bytestring()
+            features.write(os.path.join(final_path, 'features.hdf5'),
+                       path='/{0}'.format(idstring), format='hdf5')
 
     system_call = "mv {0}*.png {1}".format(plot_directorytmp, final_path)
     os.system(system_call)
@@ -152,7 +158,6 @@ def main(channel_name, frametype, event_time, project_info_pickle,
 if __name__ == '__main__':
     args = parse_commandline()
     main(args.channel_name, args.frametype,
-         args.event_time, args.project_info_pickle,
-         args.id, args.plot_directory, args.path_to_cnn_model,
-         args.path_to_semantic_file,
+         args.event_time, args.id, args.plot_directory, args.path_to_cnn_model,
+         args.project_info_pickle, args.path_to_semantic_file,
          args.gravityspy_id, args.hdf5, args.sql, args.verbose)
diff --git a/gravityspy/table/events.py b/gravityspy/table/events.py
@@ -462,6 +462,83 @@ def get_triggers(cls, start, end, channel,
 
         return triggers
 
+    def create_sub(self, channel_name, frame_type,
+                   path_to_cnn, plot_directory,
+                   subfile_name='gravityspy.sub',
+                   accounting_group_user='scott.coughlin',
+                   accounting_group='ligo.dev.o1.detchar.ch_categorization.glitchzoo'):
+        """Create a dag file to process all events in table
+
+        Parameters:
+
+            nclusters (int): how many clusters to try to group
+                these triggers into
+
+        Returns:
+            `Events` table
+        """
+        # Determine location of executables
+        proc = subprocess.Popen(['which', 'wscan'],
+                                stdin = subprocess.PIPE,
+                                stdout = subprocess.PIPE,
+                                stderr = subprocess.PIPE
+                                )
+        (path_to_wscan, err) = proc.communicate()
+        if not path_to_wscan:
+            raise ValueError('Cannot locate wscan executable in your path')
+
+        for d in ['logs', 'condor']:
+            if not os.path.isdir(d):
+                os.makedirs(d)
+
+        with open(subfile_name, 'w') as subfile:
+            subfile.write('universe = vanilla\n')
+            subfile.write('executable = {0}\n'.format(path_to_wscan))
+            subfile.write('\n')
+            subfile.write('arguments = "--channel-name {0} '
+                          '--frametype {1} '
+                          '--event-time $(event_time) '
+                          '--plot-directory {2} --hdf5 '
+                          '--path-to-cnn-model {3}"\n'.format(channel_name,
+                                                            frame_type,
+                                                            plot_directory,
+                                                            path_to_cnn))
+            subfile.write('getEnv=True\n')
+            subfile.write('\n')
+            subfile.write('accounting_group_user = {0}\n'.format(accounting_group_user))
+            subfile.write('accounting_group = {0}\n'.format(accounting_group))
+            subfile.write('\n')
+            subfile.write('priority = 0\n')
+            subfile.write('request_memory = 5000\n')
+            subfile.write('\n')
+            subfile.write('error = logs/gravityspy-$(jobNumber).err\n')
+            subfile.write('output = logs/gravityspy-$(jobNumber).out\n')
+            subfile.write('notification = Error\n')
+            subfile.write('queue 1')
+            subfile.close()
+
+    def create_dag(self, subfile_name='gravityspy.sub', retry_number=3):
+        """Create a dag file to process all events in table
+
+        Parameters:
+
+            nclusters (int): how many clusters to try to group
+                these triggers into
+
+        Returns:
+            `Events` table
+        """
+        with open('gravityspy_{0}_{1}.dag'.format(self['peak_time'].min(),
+                                                  self['peak_time'].max()),'a+') as dagfile:
+            for peak_time, peak_time_ns, event_id, event_time in zip(self['peak_time'],
+                                                                     self['peak_time_ns'],
+                                                                     self['event_id'],
+                                                                     self['event_time']):
+                job_number = '{0}{1}{2}'.format(peak_time, peak_time_ns, event_id)
+                dagfile.write('JOB {0} {1}\n'.format(job_number, subfile_name))
+                dagfile.write('RETRY {0} {1}\n'.format(job_number, retry_number))
+                dagfile.write('VARS {0} jobNumber="{0}" event_time="{1}"'.format(job_number, repr(event_time)))
+                dagfile.write('\n\n')
 
 def id_generator(x, size=10,
                  chars=(string.ascii_uppercase +