Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the convertToRaw script #39047

Merged
merged 3 commits into from Aug 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
136 changes: 120 additions & 16 deletions HLTrigger/Tools/scripts/convertToRaw
Expand Up @@ -10,32 +10,79 @@ import socket
import subprocess
import sys

def cmsRun(config, **args):
def cmsRun(config: str, verbose: bool, **args):
cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ]
sys.stdout.write(' \\\n '.join(cmd))
sys.stdout.write('\n\n')
status = subprocess.run(cmd, stdout=None, stderr=None)
status.check_returncode()
if verbose:
status = subprocess.run(cmd, stdout=None, stderr=None)
else:
status = subprocess.run(cmd, capture_output=True, text=True)

# handle error conditions
if status.returncode < 0:
sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode)
if not verbose:
sys.stderr.write('\n')
sys.stderr.write(status.stderr)
sys.exit(status.returncode)
elif status.returncode > 0:
sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode)
if not verbose:
sys.stderr.write('\n')
sys.stderr.write(status.stderr)
sys.exit(status.returncode)


class LuminosityBlockRange:
def __init__(self, value: str = '') -> None:
self.min_run = 0
self.max_run = 0
self.min_lumi = 0
self.max_lumi = 0
if value and value != 'all':
((self.min_run, self.min_lumi), (self.max_run, self.max_lumi)) = LuminosityBlockRange.parse_range(value)

@staticmethod
def parse_value(value: str) -> int:
return 0 if value in ('', 'min', 'max') else int(value)

@staticmethod
def parse_value_pair(value: str) -> (int, int):
if value.count(':') > 1:
raise ValueError('invalid syntax')
(first, second) = value.split(':') if ':' in value else ('', value)
return LuminosityBlockRange.parse_value(first), LuminosityBlockRange.parse_value(second)

@staticmethod
def parse_range(value: str) -> ((int, int), (int, int)):
if value.count('-') > 1:
raise ValueError('invalid syntax')
(first, second) = value.split('-') if '-' in value else (value, value)
return LuminosityBlockRange.parse_value_pair(first), LuminosityBlockRange.parse_value_pair(second)

def is_in_range(self, run: int, lumi: int) -> bool:
return (
(self.min_run == 0 or self.min_run == run) and (self.min_lumi == 0 or self.min_lumi <= lumi) or
(self.min_run != 0 and self.min_run < run)
) and (
(self.max_run == 0 or self.max_run == run) and (self.max_lumi == 0 or self.max_lumi >= lumi) or
(self.min_run != 0 and self.max_run > run)
)


# default values
events_per_file = 50
events_per_lumi = 11650
events_per_file = 100
events_per_lumi = 11655
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my education: how was 11655 chosen?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's how many events we can expect an HLT node to process in each lumisection under nominal conditions: 100 kHz / 200 nodes × 23.31 s = 11655 events

output_directory = ''

parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format')
parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created')
parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
parser.add_argument('-r', '--range', type=LuminosityBlockRange, dest='range', metavar='[RUN:LUMI-RUN:LUMI]', default='all', help='process only the runs and lumisections in the given range')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='print additional information while processing the input files')
parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')

# parse the command line arguments and options
Expand All @@ -59,16 +106,24 @@ content = {}
for f in files:

# run edmFileUtil --eventsInLumis ...
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
print(f'preprocessing input file {f}')
if args.verbose:
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], stdout=None, stderr=None)
else:
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)

# handle error conditions
if output.returncode < 0:
sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode)
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
if not args.verbose:
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
sys.exit(output.returncode)
elif output.returncode > 0:
sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode)
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
if not args.verbose:
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
sys.exit(output.returncode)

# parse the output of edmFileUtil
Expand All @@ -86,14 +141,23 @@ for f in files:

if parsing:
run, lumi, events = tuple(map(int, line.split()))
if not args.range.is_in_range(run, lumi):
print(f' run {run}, lumisetion {lumi} is outside of the given range and will be skipped')
continue
if events == 0:
print(f' run {run}, lumisetion {lumi} is empty and will be skipped')
continue
print(f' run {run}, lumisetion {lumi} with {events} events will be processed')
if not run in content:
content[run] = {}
if not lumi in content[run]:
content[run][lumi] = FileInfo()
content[run][lumi].events += events
content[run][lumi].files.add(f)
print()

# drop empty lumisections
# note: this may no longer be needed, but is left as a cross check
for run in content:
empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ]
for lumi in empty_lumis:
Expand All @@ -117,6 +181,7 @@ if not os.path.exists(config_py):
sys.exit(1)

# convert the input data to FED RAW data format
converted_files = []

# process each run
for run in sorted(content):
Expand All @@ -130,7 +195,8 @@ for run in sorted(content):
# process the whole run
lumis = sorted(content[run])
print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis)))
cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory)
cmsRun(config_py, args.verbose, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory)
converted_files = glob.glob(run_path + f'/run{run:06d}_ls{lumi:04d}_*.raw')

else:
# process lumisections individualy, then merge the output
Expand All @@ -147,7 +213,7 @@ for run in sorted(content):
lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
shutil.rmtree(lumi_path, ignore_errors=True)
os.makedirs(lumi_path)
cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path)
cmsRun(config_py, args.verbose, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path)

# merge all lumisetions data

Expand All @@ -174,7 +240,10 @@ for run in sorted(content):
# lumisection data and EoLS files
lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*')
for f in lumi_files:
shutil.move(f, run_path + '/')
target = run_path + f.removeprefix(lumi_path)
shutil.move(f, target)
if f.endswith('.raw'):
converted_files.append(target)

# read the partial EoR file
eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn'
Expand All @@ -199,7 +268,42 @@ for run in sorted(content):

# write the final EoR file
# implemented by hand instead of using json.dump() to match the style used by the DAQ tools
assert len(converted_files) == summary['data'][1]
eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn'
f = open(eor_file, 'w')
f.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
f.close()
with open(eor_file, 'w') as file:
file.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
file.close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought with-open-as takes care of closing the file (so close wouldnt be needed).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it does, but I don't mind doing it explicitly


# mark the .raw files as not executable
for f in converted_files:
os.chmod(f, 0o644)

# write a cff file for processing the converted files
cff_file = args.output_directory + f'/run{run:06d}_cff.py'
with open(cff_file, 'w') as file:
file.write("""import FWCore.ParameterSet.Config as cms

from EventFilter.Utilities.FedRawDataInputSource_cfi import source as _source
source = _source.clone(
eventChunkSize = 200, # MB
eventChunkBlock = 200, # MB
numBuffers = 4,
maxBufferedFiles = 4,
fileListMode = True,
fileNames = (
%s
)
)

from EventFilter.Utilities.EvFDaqDirector_cfi import EvFDaqDirector as _EvFDaqDirector
EvFDaqDirector = _EvFDaqDirector.clone(
buBaseDir = '%s',
runNumber = %d
)

from EventFilter.Utilities.FastMonitoringService_cfi import FastMonitoringService as _FastMonitoringService
FastMonitoringService = _FastMonitoringService.clone()
""" % ('\n'.join(" '" + f + "'," for f in converted_files), args.output_directory, run))
file.close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought with-open-as takes care of closing the file (so close wouldnt be needed).


# all done