/
convertToRaw
executable file
·205 lines (170 loc) · 8.5 KB
/
convertToRaw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#! /usr/bin/env python3
import argparse
import glob
import json
import os
import re
import shutil
import socket
import subprocess
import sys
def cmsRun(config, **args):
cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ]
sys.stdout.write(' \\\n '.join(cmd))
sys.stdout.write('\n\n')
status = subprocess.run(cmd, stdout=None, stderr=None)
status.check_returncode()
# handle error conditions
if status.returncode < 0:
sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode)
sys.exit(status.returncode)
elif status.returncode > 0:
sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode)
sys.exit(status.returncode)
# default values
events_per_file = 50
events_per_lumi = 11650
output_directory = ''
parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format')
parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created')
parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events')
parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection')
parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)')
# parse the command line arguments and options
args = parser.parse_args()
if args.output_directory and args.output_directory.endswith('/'):
args.output_directory = args.output_directory[:-1]
# read the list of input files from the command line arguments
files = [ 'file:' + f if (not ':' in f and not f.startswith('/store/') and os.path.exists(f)) else f for f in args.files ]
# extract the list of runs and lumiections in the input files
class FileInfo(object):
def __init__(self):
self.events = 0
self.files = set()
header = re.compile(r'^ +Run +Lumi +# Events$')
empty = re.compile(r'^ *$')
content = {}
for f in files:
# run edmFileUtil --eventsInLumis ...
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True)
if output.returncode < 0:
sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode)
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
sys.exit(output.returncode)
elif output.returncode > 0:
sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode)
sys.stderr.write('\n')
sys.stderr.write(output.stderr)
sys.exit(output.returncode)
# parse the output of edmFileUtil
parsing = False
for line in output.stdout.splitlines():
if not parsing and header.match(line):
# start parsing
parsing = True
continue
if parsing and empty.match(line):
# stop parsing
parsing = False
continue
if parsing:
run, lumi, events = tuple(map(int, line.split()))
if not run in content:
content[run] = {}
if not lumi in content[run]:
content[run][lumi] = FileInfo()
content[run][lumi].events += events
content[run][lumi].files.add(f)
# drop empty lumisections
for run in content:
empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ]
for lumi in empty_lumis:
del content[run][lumi]
# drop empty runs
empty_runs = [ run for run in content if not content[run] ]
for run in empty_runs:
del content[run]
# locate the CMSSW configuration file
config_name = 'HLTrigger/Tools/python/convertToRaw.py'
current_area = os.environ['CMSSW_BASE']
release_area = os.environ['CMSSW_RELEASE_BASE']
config_py = current_area + '/src/' + config_name
if not os.path.exists(config_py):
config_py = release_area + '/src/' + config_name
if not os.path.exists(config_py):
sys.stderr.write('error: cannot find the configuration file %s\n' % config_name)
sys.exit(1)
# convert the input data to FED RAW data format
# process each run
for run in sorted(content):
# create the output directory structure
run_path = args.output_directory + f'/run{run:06d}'
shutil.rmtree(run_path, ignore_errors=True)
os.makedirs(run_path)
if args.one_file_per_lumi:
# process the whole run
lumis = sorted(content[run])
print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis)))
cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory)
else:
# process lumisections individualy, then merge the output
summary = {
'data': [0, 0, 0, 0], # [ 'events', 'files', 'lumisections', 'last lumisection' ]
'definition': run_path + '/jsd/EoR.jsd',
'source': socket.getfqdn() + '_' + str(os.getpid())
}
for lumi in sorted(content[run]):
# process individual lumisections
print('found run %d, lumi %d, with %d events' % (run, lumi, content[run][lumi].events))
lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
shutil.rmtree(lumi_path, ignore_errors=True)
os.makedirs(lumi_path)
cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path)
# merge all lumisetions data
# number of events expected to be processed
if args.events_per_lumi < 0:
expected_events = content[run][lumi].events
else:
expected_events = min(args.events_per_lumi, content[run][lumi].events)
# number of files expected to be created
expected_files = (expected_events + args.events_per_file - 1) // args.events_per_file
# find the files produced by the conversion job and move them to the per-run path
lumi_base_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}'
lumi_path = lumi_base_path + f'/run{run:06d}'
# jsd files
jsd_path = lumi_path + '/jsd'
if not os.path.exists(run_path + '/jsd'):
shutil.move(jsd_path, run_path)
else:
shutil.rmtree(jsd_path)
# lumisection data and EoLS files
lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*')
for f in lumi_files:
shutil.move(f, run_path + '/')
# read the partial EoR file
eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn'
with open(eor_file) as f:
eor = json.load(f)
produced_events = int(eor['data'][0])
produced_files = int(eor['data'][1])
produced_lumis = int(eor['data'][2])
produced_last_lumi = int(eor['data'][3])
assert produced_events == expected_events
assert produced_files == expected_files
assert produced_lumis == 1
assert produced_last_lumi == lumi
summary['data'][0] += expected_events
summary['data'][1] += expected_files
summary['data'][2] += 1
summary['data'][3] = lumi
os.remove(eor_file)
# remove the intermediate directory
shutil.rmtree(lumi_base_path, ignore_errors=True)
# write the final EoR file
# implemented by hand instead of using json.dump() to match the style used by the DAQ tools
eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn'
f = open(eor_file, 'w')
f.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source']))
f.close()