Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calibration stream files are added and the script is improved. #6800

Merged
merged 1 commit into from Dec 12, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
190 changes: 88 additions & 102 deletions DQM/Integration/scripts/roll_playback.py
@@ -1,7 +1,9 @@
#!/usr/bin/env python2

# TODO: automatically determine from which LS to start (currently this is hard-coded to 1)

# TODO list
# - handle the situation where no .jsn of data files are found in the source directory in a better way
# - automatically determine from which LS to start (currently this is hard-coded to 1)
# - when dealing with file I/O use the python "file scope"

import os
import sys
Expand All @@ -11,116 +13,80 @@
import json


dat_source = '/fff/ramdisk/playback_files/run224380'
pb_source = '/fff/ramdisk/playback_files/run225044_pb'
dat_source = '/fff/ramdisk/playback_files/run228928'
pb_source = '/fff/ramdisk/playback_files/run228928'
calib_source = '/fff/ramdisk/playback_files/run228928'

destination = '/fff/ramdisk'
lumi_len = 23 # in seconds
run_padding = 6
lumi_padding = 4
files_copied_buffer_len = 20 # the number of file to keep in the destination directory


def dat_sanity_check(dat_source):
dat_jsn_files = []
dat_files = []
dat_run_number = None

# find the dat json files
files = os.listdir(dat_source)
dat_jsn_pattern = re.compile(r'run([0-9]+)_ls([0-9]+)_streamDQM_StorageManager.jsn')
dat_jsn_files = sorted(filter(lambda x: dat_jsn_pattern.match(x), files))
if len(dat_jsn_files) < 1:
print('No dat json files are found in "{0}"'.format(dat_source))
return False, dat_jsn_files, dat_files, dat_run_number

# check if the dat files exist
jsn_files_tobe_removed = []
for jsn_file in dat_jsn_files:
dat_file = jsn_file.replace('.jsn','.dat')
if not os.path.exists(dat_source + '/' + dat_file):
print('The dat file {0} does NOT exist! Removing the corresponding json file.'.format(dat_file))
jsn_files_tobe_removed.append(jsn_file)

# remove the json files that don't have corresponding dat file
dat_jsn_files = [x for x in dat_jsn_files if x not in jsn_files_tobe_removed]

# create a list of dat files
dat_files = map(lambda x: x.replace('.jsn','.dat'), dat_jsn_files)

files_copied_buffer_len = 60 # the number of file to keep in the ramdisk
run_switch_interval = 90 # in seconds

dat_run_number = int(dat_jsn_pattern.match(dat_jsn_files[0]).group(1))
# check for run_number consistency
for i in range(1,len(dat_jsn_files)):
run_number_current = int(dat_jsn_pattern.match(dat_jsn_files[i]).group(1))
if run_number_current != dat_run_number:
print('Non consistent run numbers: "{0}" - expected, "{1}" - found'.format(run_nummber, run_nummber_current))
print('\t "{0}" - will be used as a run number'.format(run_nummber))
lumi_skip_length = 10

return True, dat_jsn_files, dat_files, dat_run_number
file_types = { 'general_files': {'extension':'.dat', 're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQM_mrg-[A-Za-z0-9-]+\.jsn'},
'hlt_pb_files': {'extension':'.pb', 're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQMHistograms_mrg-[A-Za-z0-9-]+\.jsn'},
'calib_files': {'extension':'.dat', 're_pattern':r'run([0-9]+)_ls([0-9]+)_streamDQMCalibration_mrg-[A-Za-z0-9-]+\.jsn'}, }


def pb_sanity_check(pb_source):
pb_jsn_files = []
pb_files = []
pb_run_number = None
def sanity_check(source, file_info):
jsn_files = []
data_files = []
run_number = None

# find the pb json files
files = os.listdir(pb_source)
pb_jsn_pattern = re.compile(r'run([0-9]+)_ls([0-9]+)_streamDQMHistograms_StorageManager.jsn')
pb_jsn_files = sorted(filter(lambda x: pb_jsn_pattern.match(x), files))
# find the json files that match the given pattern
files = os.listdir(source)
jsn_pattern = re.compile(file_info['re_pattern'])
jsn_files = sorted(filter(lambda x: jsn_pattern.match(x), files))

# check if the pb files exist
# check if the data files exist
jsn_files_tobe_removed = []
for jsn_file in pb_jsn_files:
pb_file = jsn_file.replace('.jsn','.pb')
if not os.path.exists(pb_source + '/' + pb_file):
print('The pb file {0} does NOT exist! Removing the corresponding json file.'.format(pb_file))
for jsn_file in jsn_files:
data_file = jsn_file.replace('.jsn', file_info['extension'])
if os.path.exists(source + '/' + data_file):
data_files.append(data_file)
else:
print('The data file {0} does NOT exist! Removing the corresponding json file.'.format(data_file))
jsn_files_tobe_removed.append(jsn_file)

# remove the json files that don't have corresponding pb file
pb_jsn_files = [x for x in pb_jsn_files if x not in jsn_files_tobe_removed]
# remove the json files that don't have corresponding data file
jsn_files = [x for x in jsn_files if x not in jsn_files_tobe_removed]

if len(pb_jsn_files) < 1:
print('No pb json files are found in "{0}"'.format(pb_source))
return False, pb_jsn_files, pb_files, pb_run_number

# create a list of pb files
pb_files = map(lambda x: x.replace('.jsn','.pb'), pb_jsn_files)

pb_run_number = int(pb_jsn_pattern.match(pb_jsn_files[0]).group(1))
run_number = int(jsn_pattern.match(jsn_files[0]).group(1))
# check for run_number consistency
for i in range(1,len(pb_jsn_files)):
run_number_current = int(pb_jsn_pattern.match(pb_jsn_files[i]).group(1))
if run_number_current != pb_run_number:
for i in range(1,len(jsn_files)):
run_number_current = int(jsn_pattern.match(jsn_files[i]).group(1))
if run_number_current != run_number:
print('Non consistent run numbers: "{0}" - expected, "{1}" - found'.format(run_nummber, run_nummber_current))
print('\t "{0}" - will be used as a run number'.format(run_nummber))

return True, pb_jsn_files, pb_files, pb_run_number
return True, jsn_files, data_files, run_number


def copy_next_lumi(jsn_files, files, run_number, current_lumi, source, destination):
assert(len(jsn_files) == len(files))

index = current_lumi % len(jsn_files)
def copy_next_lumi(jsn_file, file, run_number, current_lumi, source, destination, copy_file=True):
index = current_lumi % len(jsn_file)

# copy the file
input_fn = source + '/' + files[index]
output_fn = files[index]
input_fn = source + '/' + file
output_fn = file
run_start = output_fn.find('run') + 3
output_fn = output_fn[:run_start] + str(run_number).zfill(run_padding) + output_fn[run_start + run_padding:]
lumi_start = output_fn.find('ls') + 2
output_fn = destination + '/' + output_fn[:lumi_start] + str(current_lumi).zfill(lumi_padding) + output_fn[lumi_start + lumi_padding:]
os.link(input_fn, output_fn) # instead of copying the file create a hard link
print(input_fn + ' -> ' + output_fn)
if copy_file:
os.link(input_fn, output_fn) # instead of copying the file create a hard link
print(input_fn + ' -> ' + output_fn)

# modyfy and copy the json file
input_jsn_fn = source + '/' + jsn_files[index]
# load the original json contents
input_jsn_fn = source + '/' + jsn_file
input_jsn = open(input_jsn_fn, 'r')
jsn_data = json.load(input_jsn)
input_jsn.close()

# generate the output jsn file name
output_jsn_fn = jsn_files[index]
output_jsn_fn = jsn_file
run_start = output_jsn_fn.find('run') + 3
output_jsn_fn = output_jsn_fn[:run_start] + str(run_number).zfill(run_padding) + output_jsn_fn[run_start + run_padding:]
lumi_start = output_jsn_fn.find('ls') + 2
Expand All @@ -136,37 +102,57 @@ def copy_next_lumi(jsn_files, files, run_number, current_lumi, source, destinati

print(input_jsn_fn + ' -> ' + output_jsn_fn)

return output_jsn_fn, output_fn

return (output_jsn_fn, output_fn) if copy_file else (output_jsn_fn, )


if __name__ == '__main__':
dat_dir_ok, dat_jsn_files, dat_files, dat_run_number = dat_sanity_check(dat_source)
pb_dir_ok, pb_jsn_files, pb_files, pb_run_number = pb_sanity_check(pb_source)
dat_dir_ok, dat_jsn_files, dat_files, run_number = sanity_check(dat_source, file_types['general_files'])
pb_dir_ok, pb_jsn_files, pb_files, pb_run_number = sanity_check(pb_source, file_types['hlt_pb_files'])
calib_dir_ok, calib_jsn_files, calib_files, calib_run_number = sanity_check(calib_source, file_types['calib_files'])

if dat_dir_ok and pb_dir_ok:
run_number = int(dat_run_number)
if run_number != int(pb_run_number):
print('The dat run number "{0}" differs from the PB run number "{1}".'.format(run_number, pb_run_number))
print('"{0}" is going to be used as a run number.'.format(run_number))
if dat_dir_ok and pb_dir_ok and calib_dir_ok:
if (run_number != pb_run_number) or (run_number != calib_run_number):
print('The DAT run number differs from the PB or Calibration run number.')
print('"{0}" is going to be used as a run number. \n'.format(run_number))

run_length = len(dat_jsn_files)
lumi_skip_at = None
copy_file = True
if run_length > 25:
lumi_skip_at = run_length/10

output_dir = destination + '/' + 'run' + str(dat_run_number).zfill(run_padding)
if not os.path.exists(output_dir): os.mkdir(output_dir)

time.sleep(1) # a hack in order python inotify to work correctly

current_lumi = 1
files_copied = []

while True:
files_copied += copy_next_lumi(dat_jsn_files, dat_files, run_number, current_lumi, dat_source, output_dir)
output_dir = destination + '/' + 'run' + str(run_number).zfill(run_padding)
os.mkdir(output_dir)
time.sleep(1) # a hack in order python inotify to work correctly

current_lumi = 1
for i in range(len(dat_jsn_files)):
files_copied += copy_next_lumi(dat_jsn_files[i], dat_files[i], run_number, current_lumi, dat_source, output_dir, copy_file)

j = i%len(pb_jsn_files)
files_copied += copy_next_lumi(pb_jsn_files[j], pb_files[j], run_number, current_lumi, pb_source, output_dir, copy_file)

k = i%len(calib_jsn_files)
files_copied += copy_next_lumi(calib_jsn_files[k], calib_files[k], run_number, current_lumi, calib_source, output_dir, copy_file)

if not lumi_skip_at or (current_lumi != lumi_skip_at): current_lumi += 1
else: current_lumi += lumi_skip_length

if not lumi_skip_at or (current_lumi < 2*lumi_skip_at) or (current_lumi > 2*lumi_skip_at+lumi_skip_length): copy_file = True
else: copy_file = False

time.sleep(lumi_len)

files_copied += copy_next_lumi(pb_jsn_files, pb_files, run_number, current_lumi, pb_source, output_dir)
# clear some of the old files
while files_copied_buffer_len < len(files_copied):
os.remove(files_copied.pop(0))

print('******************************************************************************************')
print('')

while files_copied_buffer_len < len(files_copied):
os.remove(files_copied.pop(0))
run_number += 1
print('\n\n')
time.sleep(run_switch_interval)

current_lumi += 1
time.sleep(lumi_len)