cms-sw · cmsbuild · Dec 12, 2014 · Dec 5, 2014
diff --git a/DQM/Integration/scripts/roll_playback.py b/DQM/Integration/scripts/roll_playback.py
@@ -1,7 +1,9 @@
 #!/usr/bin/env python2
 
-# TODO: automatically determine from which LS to start (currently this is hard-coded to 1)
-
+# TODO list
+# - handle the situation where no .jsn of data files are found in the source directory in a better way
+# - automatically determine from which LS to start (currently this is hard-coded to 1)
+# - when dealing with file I/O use the python "file scope"
 
 import os
 import sys
@@ -11,116 +13,80 @@
 import json
 
 
-dat_source = '/fff/ramdisk/playback_files/run224380'
-pb_source = '/fff/ramdisk/playback_files/run225044_pb'
+dat_source   = '/fff/ramdisk/playback_files/run228928'
+pb_source    = '/fff/ramdisk/playback_files/run228928'
+calib_source = '/fff/ramdisk/playback_files/run228928'
+
 destination = '/fff/ramdisk'
 lumi_len = 23 # in seconds
 run_padding = 6
 lumi_padding = 4
-files_copied_buffer_len = 20 # the number of file to keep in the destination directory
-
-
-def dat_sanity_check(dat_source):
-    dat_jsn_files = []
-    dat_files = []
-    dat_run_number = None
-
-    # find the dat json files
-    files = os.listdir(dat_source)
-    dat_jsn_pattern = re.compile(r'run([0-9]+)_ls([0-9]+)_streamDQM_StorageManager.jsn')
-    dat_jsn_files = sorted(filter(lambda x: dat_jsn_pattern.match(x), files))
-    if len(dat_jsn_files) < 1:
-        print('No dat json files are found in "{0}"'.format(dat_source))
-        return False, dat_jsn_files, dat_files, dat_run_number
-
-    # check if the dat files exist
-    jsn_files_tobe_removed = []
-    for jsn_file in dat_jsn_files:
-        dat_file = jsn_file.replace('.jsn','.dat')
-        if not os.path.exists(dat_source + '/' + dat_file):
-            print('The dat file {0} does NOT exist! Removing the corresponding json file.'.format(dat_file))
-            jsn_files_tobe_removed.append(jsn_file)
-
-    # remove the json files that don't have corresponding dat file
-    dat_jsn_files = [x for x in dat_jsn_files if x not in jsn_files_tobe_removed]
-
-    # create a list of dat files
-    dat_files = map(lambda x: x.replace('.jsn','.dat'), dat_jsn_files)
-
+files_copied_buffer_len = 60 # the number of file to keep in the ramdisk
+run_switch_interval = 90 # in seconds
 
-    dat_run_number = int(dat_jsn_pattern.match(dat_jsn_files[0]).group(1))
-    # check for run_number consistency
-    for i in range(1,len(dat_jsn_files)):
-        run_number_current = int(dat_jsn_pattern.match(dat_jsn_files[i]).group(1))
-        if run_number_current != dat_run_number:
-            print('Non consistent run numbers: "{0}" - expected, "{1}" - found'.format(run_nummber, run_nummber_current))
-            print('\t "{0}" - will be used as a run number'.format(run_nummber))
+lumi_skip_length = 10 
 
-    return True, dat_jsn_files, dat_files, dat_run_number
+file_types = { 'general_files': {'extension':'.dat', 're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQM_mrg-[A-Za-z0-9-]+\.jsn'},
+               'hlt_pb_files':  {'extension':'.pb',  're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQMHistograms_mrg-[A-Za-z0-9-]+\.jsn'},
+               'calib_files':   {'extension':'.dat', 're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQMCalibration_mrg-[A-Za-z0-9-]+\.jsn'}, }
 
 
-def pb_sanity_check(pb_source):
-    pb_jsn_files = []
-    pb_files = []
-    pb_run_number = None
+def sanity_check(source, file_info):
+    jsn_files = []
+    data_files = []
+    run_number = None
 
-    # find the pb json files
-    files = os.listdir(pb_source)
-    pb_jsn_pattern = re.compile(r'run([0-9]+)_ls([0-9]+)_streamDQMHistograms_StorageManager.jsn')
-    pb_jsn_files = sorted(filter(lambda x: pb_jsn_pattern.match(x), files))
+    # find the json files that match the given pattern
+    files = os.listdir(source)
+    jsn_pattern = re.compile(file_info['re_pattern'])
+    jsn_files = sorted(filter(lambda x: jsn_pattern.match(x), files))
 
-    # check if the pb files exist
+    # check if the data files exist
     jsn_files_tobe_removed = []
-    for jsn_file in pb_jsn_files:
-        pb_file = jsn_file.replace('.jsn','.pb')
-        if not os.path.exists(pb_source + '/' + pb_file):
-            print('The pb file {0} does NOT exist! Removing the corresponding json file.'.format(pb_file))
+    for jsn_file in jsn_files:
+        data_file = jsn_file.replace('.jsn', file_info['extension'])
+        if os.path.exists(source + '/' + data_file):
+            data_files.append(data_file)
+        else:
+            print('The data file {0} does NOT exist! Removing the corresponding json file.'.format(data_file))
             jsn_files_tobe_removed.append(jsn_file)
 
-    # remove the json files that don't have corresponding pb file
-    pb_jsn_files = [x for x in pb_jsn_files if x not in jsn_files_tobe_removed]
+    # remove the json files that don't have corresponding data file
+    jsn_files = [x for x in jsn_files if x not in jsn_files_tobe_removed]
 
-    if len(pb_jsn_files) < 1:
-        print('No pb json files are found in "{0}"'.format(pb_source))
-        return False, pb_jsn_files, pb_files, pb_run_number
-
-    # create a list of pb files
-    pb_files = map(lambda x: x.replace('.jsn','.pb'), pb_jsn_files)
-
-    pb_run_number = int(pb_jsn_pattern.match(pb_jsn_files[0]).group(1))
+    run_number = int(jsn_pattern.match(jsn_files[0]).group(1))
     # check for run_number consistency
-    for i in range(1,len(pb_jsn_files)):
-        run_number_current = int(pb_jsn_pattern.match(pb_jsn_files[i]).group(1))
-        if run_number_current != pb_run_number:
+    for i in range(1,len(jsn_files)):
+        run_number_current = int(jsn_pattern.match(jsn_files[i]).group(1))
+        if run_number_current != run_number:
             print('Non consistent run numbers: "{0}" - expected, "{1}" - found'.format(run_nummber, run_nummber_current))
             print('\t "{0}" - will be used as a run number'.format(run_nummber))
 
-    return True, pb_jsn_files, pb_files, pb_run_number
+    return True, jsn_files, data_files, run_number
 
 
-def copy_next_lumi(jsn_files, files, run_number, current_lumi, source, destination):
-    assert(len(jsn_files) == len(files))
-
-    index = current_lumi % len(jsn_files)
+def copy_next_lumi(jsn_file, file, run_number, current_lumi, source, destination, copy_file=True):
+    index = current_lumi % len(jsn_file)
 
     # copy the file
-    input_fn = source + '/' + files[index]
-    output_fn = files[index]
+    input_fn = source + '/' + file
+    output_fn = file
     run_start = output_fn.find('run') + 3
     output_fn = output_fn[:run_start] + str(run_number).zfill(run_padding) + output_fn[run_start + run_padding:]
     lumi_start = output_fn.find('ls') + 2
     output_fn = destination + '/' + output_fn[:lumi_start] + str(current_lumi).zfill(lumi_padding) + output_fn[lumi_start + lumi_padding:]
-    os.link(input_fn, output_fn) # instead of copying the file create a hard link
-    print(input_fn + ' -> ' + output_fn)
+    if copy_file:
+        os.link(input_fn, output_fn) # instead of copying the file create a hard link
+        print(input_fn + ' -> ' + output_fn)
 
-    # modyfy and copy the json file
-    input_jsn_fn = source + '/' + jsn_files[index]
+    # load the original json contents
+    input_jsn_fn = source + '/' + jsn_file
     input_jsn = open(input_jsn_fn, 'r')
     jsn_data = json.load(input_jsn)
     input_jsn.close()
 
     # generate the output jsn file name
-    output_jsn_fn = jsn_files[index]
+    output_jsn_fn = jsn_file
     run_start = output_jsn_fn.find('run') + 3
     output_jsn_fn = output_jsn_fn[:run_start] + str(run_number).zfill(run_padding) + output_jsn_fn[run_start + run_padding:]
     lumi_start = output_jsn_fn.find('ls') + 2
@@ -136,37 +102,57 @@ def copy_next_lumi(jsn_files, files, run_number, current_lumi, source, destinati
 
     print(input_jsn_fn + ' -> ' + output_jsn_fn)
 
-    return output_jsn_fn, output_fn
-
+    return (output_jsn_fn, output_fn) if copy_file else (output_jsn_fn, )
 
 
 if __name__ == '__main__':
-    dat_dir_ok, dat_jsn_files, dat_files, dat_run_number = dat_sanity_check(dat_source)
-    pb_dir_ok, pb_jsn_files, pb_files, pb_run_number = pb_sanity_check(pb_source)
+    dat_dir_ok, dat_jsn_files, dat_files, run_number = sanity_check(dat_source, file_types['general_files'])
+    pb_dir_ok, pb_jsn_files, pb_files, pb_run_number = sanity_check(pb_source, file_types['hlt_pb_files'])
+    calib_dir_ok, calib_jsn_files, calib_files, calib_run_number = sanity_check(calib_source, file_types['calib_files'])
 
-    if dat_dir_ok and pb_dir_ok:
-        run_number = int(dat_run_number)
-        if run_number != int(pb_run_number):
-            print('The dat run number "{0}" differs from the PB run number "{1}".'.format(run_number, pb_run_number))
-            print('"{0}" is going to be used as a run number.'.format(run_number))
+    if dat_dir_ok and pb_dir_ok and calib_dir_ok:
+        if (run_number != pb_run_number) or (run_number != calib_run_number):
+            print('The DAT run number differs from the PB or Calibration run number.')
+            print('"{0}" is going to be used as a run number. \n'.format(run_number))
 
+        run_length = len(dat_jsn_files)
+        lumi_skip_at = None
+        copy_file = True
+        if run_length > 25:
+            lumi_skip_at = run_length/10
 
-        output_dir = destination + '/' + 'run' + str(dat_run_number).zfill(run_padding)
-        if not os.path.exists(output_dir): os.mkdir(output_dir)
-
-        time.sleep(1) # a hack in order python inotify to work correctly
-
-        current_lumi = 1
         files_copied = []
+
         while True:
-            files_copied += copy_next_lumi(dat_jsn_files, dat_files, run_number, current_lumi, dat_source, output_dir)
+            output_dir = destination + '/' + 'run' + str(run_number).zfill(run_padding)
+            os.mkdir(output_dir)
+            time.sleep(1) # a hack in order python inotify to work correctly
+
+            current_lumi = 1
+            for i in range(len(dat_jsn_files)):
+                files_copied += copy_next_lumi(dat_jsn_files[i], dat_files[i], run_number, current_lumi, dat_source, output_dir, copy_file)
+
+                j = i%len(pb_jsn_files)
+                files_copied += copy_next_lumi(pb_jsn_files[j], pb_files[j], run_number, current_lumi, pb_source, output_dir, copy_file)
+
+                k = i%len(calib_jsn_files)
+                files_copied += copy_next_lumi(calib_jsn_files[k], calib_files[k], run_number, current_lumi, calib_source, output_dir, copy_file)
+
+                if not lumi_skip_at or (current_lumi != lumi_skip_at): current_lumi += 1
+                else: current_lumi += lumi_skip_length
+
+                if not lumi_skip_at or (current_lumi < 2*lumi_skip_at) or (current_lumi > 2*lumi_skip_at+lumi_skip_length): copy_file = True
+                else: copy_file = False
+
+                time.sleep(lumi_len)
 
-            files_copied += copy_next_lumi(pb_jsn_files, pb_files, run_number, current_lumi, pb_source, output_dir)
+                # clear some of the old files
+                while files_copied_buffer_len < len(files_copied):
+                    os.remove(files_copied.pop(0))
 
-            print('******************************************************************************************')
+                print('')
 
-            while files_copied_buffer_len < len(files_copied):
-                os.remove(files_copied.pop(0))
+            run_number += 1
+            print('\n\n')
+            time.sleep(run_switch_interval)
 
-            current_lumi += 1
-            time.sleep(lumi_len)