Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script to create a HipPy input dataset file #15887

Merged
merged 4 commits into from Oct 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py
@@ -0,0 +1,17 @@
#!/usr/bin/env python
from Alignment.OfflineValidation.TkAlAllInOneTool.dataset import Dataset
import argparse
import os
import sys

parser = argparse.ArgumentParser()
parser.add_argument("outputfilename", help="Goes into $CMSSW_BASE/src/Alignment/HIPAlignmentAlgorithm/data unless an absolute path starting with / is provided. example: ALCARECOTkAlMinBias.dat_example")
parser.add_argument("datasetname", help="example: /ZeroBias/Run2016G-TkAlMinBias-PromptReco-v1/ALCARECO")
parser.add_argument("filesperjob", type=int, help="max number of files in each job")
parser.add_argument("firstrun", type=int, nargs="?", help="first run to use")
parser.add_argument("lastrun", type=int, nargs="?", help="last run to use")
args = parser.parse_args()

dataset = Dataset(args.datasetname, tryPredefinedFirst=False)
outputfilename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "HIPAlignmentAlgorithm", "data", args.outputfilename)
dataset.createdatasetfile_hippy(outputfilename, args.filesperjob, args.firstrun, args.lastrun)
149 changes: 105 additions & 44 deletions Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
Expand Up @@ -112,42 +112,9 @@ def __chunks( self, theList, n ):
"input = cms.untracked.int32(%(nEvents)s) )\n"
"%(skipEventsString)s\n")

def __createSnippet( self, jsonPath = None, begin = None, end = None,
firstRun = None, lastRun = None, repMap = None,
crab = False, parent = False ):
if firstRun:
firstRun = int( firstRun )
if lastRun:
lastRun = int( lastRun )
if ( begin and firstRun ) or ( end and lastRun ):
msg = ( "The Usage of "
+ "'begin' & 'firstRun' " * int( bool( begin and
firstRun ) )
+ "and " * int( bool( ( begin and firstRun ) and
( end and lastRun ) ) )
+ "'end' & 'lastRun' " * int( bool( end and lastRun ) )
+ "is ambigous." )
raise AllInOneError( msg )
if begin or end:
( firstRun, lastRun ) = self.convertTimeToRun(
begin = begin, end = end, firstRun = firstRun,
lastRun = lastRun )
if ( firstRun and lastRun ) and ( firstRun > lastRun ):
msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
"chosen is greater than the upper time/runrange limit "
"('end'/'lastRun').")
raise AllInOneError( msg )
if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
"only work for official datasets, not predefined _cff.py files" )
raise AllInOneError( msg )
goodLumiSecStr = ""
lumiStr = ""
def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
lumiSecExtend = ""
if firstRun or lastRun or jsonPath:
goodLumiSecStr = ( "lumiSecs = cms.untracked."
"VLuminosityBlockRange()\n" )
lumiStr = " lumisToProcess = lumiSecs,\n"
if not jsonPath:
selectedRunList = self.__getRunList()
if firstRun:
Expand Down Expand Up @@ -224,39 +191,78 @@ def __createSnippet( self, jsonPath = None, begin = None, end = None,
else:
msg = "You are trying to run a validation without any runs! Check that:"
if firstRun or lastRun:
msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
if jsonPath:
msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
if (firstRun or lastRun) and jsonPath:
msg += "\n - firstRun and lastRun are consistent with your JSON file"
if begin:
msg = msg.replace("firstRun", "begin")
if end:
msg = msg.replace("lastRun", "end")
msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
raise AllInOneError(msg)

else:
runlist = self.__getRunList()
self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))

return lumiSecExtend

def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
if crab:
files = ""
else:
splitFileList = list( self.__chunks( self.fileList(), 255 ) )
splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
fileStr = [ "',\n'".join( files ) for files in splitFileList ]
fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
for files in fileStr ]
files = "\n".join( fileStr )

if parent:
splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
for parentFiles in parentFileStr ]
parentFiles = "\n".join( parentFileStr )
files += "\n\n" + parentFiles

return files

def __createSnippet( self, jsonPath = None, begin = None, end = None,
firstRun = None, lastRun = None, repMap = None,
crab = False, parent = False ):

if firstRun:
firstRun = int( firstRun )
if lastRun:
lastRun = int( lastRun )
if ( begin and firstRun ) or ( end and lastRun ):
msg = ( "The Usage of "
+ "'begin' & 'firstRun' " * int( bool( begin and
firstRun ) )
+ "and " * int( bool( ( begin and firstRun ) and
( end and lastRun ) ) )
+ "'end' & 'lastRun' " * int( bool( end and lastRun ) )
+ "is ambigous." )
raise AllInOneError( msg )
if begin or end:
( firstRun, lastRun ) = self.convertTimeToRun(
begin = begin, end = end, firstRun = firstRun,
lastRun = lastRun )
if ( firstRun and lastRun ) and ( firstRun > lastRun ):
msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
"chosen is greater than the upper time/runrange limit "
"('end'/'lastRun').")
raise AllInOneError( msg )
if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
"only work for official datasets, not predefined _cff.py files" )
raise AllInOneError( msg )

lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
lumiStr = goodLumiSecStr = ""
if lumiSecExtend:
goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
lumiStr = " lumisToProcess = lumiSecs,\n"

files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)

theMap = repMap
theMap["files"] = files
Expand Down Expand Up @@ -820,15 +826,70 @@ def dump_cff( self, outName = None, jsonPath = None, begin = None,
theFile.close()
return

def fileList( self, parent = False ):
def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
with open(filename, "w") as f:
for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
f.write(",".join("'{}'".format(file) for file in job)+"\n")

@staticmethod
def getrunnumberfromfilename(filename):
parts = filename.split("/")
result = error = None
if parts[0] != "" or parts[1] != "store":
error = "does not start with /store"
elif parts[2] in ["mc", "relval"]:
result = 1
elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
error = "does not end with 00000/something.root"
elif len(parts) != 12:
error = "should be exactly 11 slashes counting the first one"
else:
runnumberparts = parts[-5:-2]
if not all(len(part)==3 for part in runnumberparts):
error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
try:
result = int("".join(runnumberparts))
except ValueError:
error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))

if error:
error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
raise AllInOneError(error)

return result

def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
if self.__fileList and not parent:
return self.__fileList
if self.__parentFileList and parent:
return self.__parentFileList

fileList = [ self.__findInJson(fileInfo,"name") \
fileList = [ self.__findInJson(fileInfo,"name")
for fileInfo in self.fileInfoList(parent) ]

if firstRun is not None or lastRun is not None:
if firstRun is None: firstRun = -1
if lastRun is None: lastRun = float('infinity')
unknownfilenames, reasons = [], set()
for filename in fileList[:]:
try:
if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
fileList.remove(filename)
except AllInOneError as e:
if forcerunselection: raise
unknownfilenames.append(e.message.split("\n")[1])
reasons .add (e.message.split("\n")[2])
if reasons:
if len(unknownfilenames) == len(fileList):
print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
else:
print "Could not figure out the run numbers of the following filenames:"
for filename in unknownfilenames:
print " "+filename
print "for the following reason(s):"
for reason in reasons:
print " "+reason
print "Using the files anyway. The runs will be filtered at the CMSSW level."
if not parent:
self.__fileList = fileList
else:
Expand Down
Expand Up @@ -252,9 +252,6 @@ def createScript(self, path):
resultingFile = os.path.expandvars( resultingFile )
resultingFile = os.path.abspath( resultingFile )
resultingFile = "root://eoscms//eos/cms" + resultingFile #needs to be AFTER abspath so that it doesn't eat the //
repMap["runComparisonScripts"] += \
("xrdcp -f OUTPUT_comparison.root %s\n"
%resultingFile)
self.filesToCompare[ name ] = resultingFile

else:
Expand Down