New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update the convertToRaw
script
#39047
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,32 +10,79 @@ import socket | |
import subprocess | ||
import sys | ||
|
||
def cmsRun(config, **args): | ||
def cmsRun(config: str, verbose: bool, **args): | ||
cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ] | ||
sys.stdout.write(' \\\n '.join(cmd)) | ||
sys.stdout.write('\n\n') | ||
status = subprocess.run(cmd, stdout=None, stderr=None) | ||
status.check_returncode() | ||
if verbose: | ||
status = subprocess.run(cmd, stdout=None, stderr=None) | ||
else: | ||
status = subprocess.run(cmd, capture_output=True, text=True) | ||
|
||
# handle error conditions | ||
if status.returncode < 0: | ||
sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode) | ||
if not verbose: | ||
sys.stderr.write('\n') | ||
sys.stderr.write(status.stderr) | ||
sys.exit(status.returncode) | ||
elif status.returncode > 0: | ||
sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode) | ||
if not verbose: | ||
sys.stderr.write('\n') | ||
sys.stderr.write(status.stderr) | ||
sys.exit(status.returncode) | ||
|
||
|
||
class LuminosityBlockRange: | ||
def __init__(self, value: str = '') -> None: | ||
self.min_run = 0 | ||
self.max_run = 0 | ||
self.min_lumi = 0 | ||
self.max_lumi = 0 | ||
if value and value != 'all': | ||
((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value) | ||
|
||
@staticmethod | ||
def parse_value(value: str) -> int: | ||
return 0 if value in ('', 'min', 'max') else int(value) | ||
|
||
@staticmethod | ||
def parse_value_pair(value: str) -> (int, int): | ||
if value.count(':') > 1: | ||
raise ValueError('invalid syntax') | ||
(first, second) = value.split(':') if ':' in value else ('', value) | ||
return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second) | ||
|
||
@staticmethod | ||
def parse_range(value: str) -> ((int, int), (int, int)): | ||
if value.count('-') > 1: | ||
raise ValueError('invalid syntax') | ||
(first, second) = value.split('-') if '-' in value else (value, value) | ||
return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second) | ||
|
||
def is_in_range(self, run: int, lumi: int) -> bool: | ||
return ( | ||
(self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or | ||
(self.min_run != 0 and self.min_run < run) | ||
) and ( | ||
(self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or | ||
(self.min_run != 0 and self.max_run > run) | ||
) | ||
|
||
|
||
# default values | ||
events_per_file = 50 | ||
events_per_lumi = 11650 | ||
events_per_file = 100 | ||
events_per_lumi = 11655 | ||
output_directory = '' | ||
|
||
parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format') | ||
parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created') | ||
parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events') | ||
parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection') | ||
parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range') | ||
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files') | ||
parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)') | ||
|
||
# parse the command line arguments and options | ||
|
@@ -59,16 +106,24 @@ content = {} | |
for f in files: | ||
|
||
# run edmFileUtil --eventsInLumis ... | ||
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True) | ||
print(f'preprocessing input file {f}') | ||
if args.verbose: | ||
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], stdout=None, stderr=None) | ||
else: | ||
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True) | ||
|
||
# handle error conditions | ||
if output.returncode < 0: | ||
sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode) | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
if not args.verbose: | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
sys.exit(output.returncode) | ||
elif output.returncode > 0: | ||
sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode) | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
if not args.verbose: | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
sys.exit(output.returncode) | ||
|
||
# parse the output of edmFileUtil | ||
|
@@ -86,14 +141,23 @@ for f in files: | |
|
||
if parsing: | ||
run, lumi, events = tuple(map(int, line.split())) | ||
if not args.range.is_in_range(run, lumi): | ||
print(f' run {run}, lumisetion {lumi} is outside of the given range and will be skipped') | ||
continue | ||
if events == 0: | ||
print(f' run {run}, lumisetion {lumi} is empty and will be skipped') | ||
continue | ||
print(f' run {run}, lumisetion {lumi} with {events} events will be processed') | ||
if not run in content: | ||
content[run] = {} | ||
if not lumi in content[run]: | ||
content[run][lumi] = FileInfo() | ||
content[run][lumi].events += events | ||
content[run][lumi].files.add(f) | ||
print() | ||
|
||
# drop empty lumisections | ||
# note: this may no longer be needed, but is left as a cross check | ||
for run in content: | ||
empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ] | ||
for lumi in empty_lumis: | ||
|
@@ -117,6 +181,7 @@ if not os.path.exists(config_py): | |
sys.exit(1) | ||
|
||
# convert the input data to FED RAW data format | ||
converted_files = [] | ||
|
||
# process each run | ||
for run in sorted(content): | ||
|
@@ -130,7 +195,8 @@ for run in sorted(content): | |
# process the whole run | ||
lumis = sorted(content[run]) | ||
print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis))) | ||
cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory) | ||
cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory) | ||
converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw') | ||
|
||
else: | ||
# process lumisections individualy, then merge the output | ||
|
@@ -147,7 +213,7 @@ for run in sorted(content): | |
lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}' | ||
shutil.rmtree(lumi_path, ignore_errors=True) | ||
os.makedirs(lumi_path) | ||
cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path) | ||
cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path) | ||
|
||
# merge all lumisetions data | ||
|
||
|
@@ -174,7 +240,10 @@ for run in sorted(content): | |
# lumisection data and EoLS files | ||
lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*') | ||
for f in lumi_files: | ||
shutil.move(f, run_path + '/') | ||
target = run_path + f.removeprefix(lumi_path) | ||
shutil.move(f, target) | ||
if f.endswith('.raw'): | ||
converted_files.append(target) | ||
|
||
# read the partial EoR file | ||
eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn' | ||
|
@@ -199,7 +268,42 @@ for run in sorted(content): | |
|
||
# write the final EoR file | ||
# implemented by hand instead of using json.dump() to match the style used by the DAQ tools | ||
assert len(converted_files) == summary['data'][1] | ||
eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn' | ||
f = open(eor_file, 'w') | ||
f.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source'])) | ||
f.close() | ||
with open(eor_file, 'w') as file: | ||
file.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source'])) | ||
file.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought with-open-as takes care of closing the file (so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it does, but I don't mind doing it explicitly |
||
|
||
# mark the .raw files as not executable | ||
for f in converted_files: | ||
os.chmod(f, 0o644) | ||
|
||
# write a cff file for processing the converted files | ||
cff_file = args.output_directory + f'/run{run:06d}_cff.py' | ||
with open(cff_file, 'w') as file: | ||
file.write("""import FWCore.ParameterSet.Config as cms | ||
|
||
from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source | ||
source = _source.clone( | ||
eventChunkSize = 200, # MB | ||
eventChunkBlock = 200, # MB | ||
numBuffers = 4, | ||
maxBufferedFiles = 4, | ||
fileListMode = True, | ||
fileNames = ( | ||
%s | ||
) | ||
) | ||
|
||
from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector | ||
EvFDaqDirector = _EvFDaqDirector.clone( | ||
buBaseDir = '%s', | ||
runNumber = %d | ||
) | ||
|
||
from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService | ||
FastMonitoringService = _FastMonitoringService.clone() | ||
""" % ('\n'.join(" '" + f + "'," for f in converted_files), args.output_directory, run)) | ||
file.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought with-open-as takes care of closing the file (so |
||
|
||
# all done |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for my education: how was
11655
chosen?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's how many events we can expect an HLT node to process in each lumisection under nominal conditions:
100 kHz / 200 nodes × 23.31 s = 11655 events