cms-sw · cmsbuild · Oct 4, 2016 · Sep 13, 2016 · Sep 13, 2016 · Sep 14, 2016
diff --git a/Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py b/Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+from Alignment.OfflineValidation.TkAlAllInOneTool.dataset import Dataset
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument("outputfilename", help="Goes into $CMSSW_BASE/src/Alignment/HIPAlignmentAlgorithm/data unless an absolute path starting with / is provided.  example: ALCARECOTkAlMinBias.dat_example")
+parser.add_argument("datasetname", help="example: /ZeroBias/Run2016G-TkAlMinBias-PromptReco-v1/ALCARECO")
+parser.add_argument("filesperjob", type=int, help="max number of files in each job")
+parser.add_argument("firstrun", type=int, nargs="?", help="first run to use")
+parser.add_argument("lastrun", type=int, nargs="?", help="last run to use")
+args = parser.parse_args()
+
+dataset = Dataset(args.datasetname, tryPredefinedFirst=False)
+outputfilename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "HIPAlignmentAlgorithm", "data", args.outputfilename)
+dataset.createdatasetfile_hippy(outputfilename, args.filesperjob, args.firstrun, args.lastrun)
diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
@@ -112,42 +112,9 @@ def __chunks( self, theList, n ):
                                "input = cms.untracked.int32(%(nEvents)s) )\n"
                                "%(skipEventsString)s\n")
 
-    def __createSnippet( self, jsonPath = None, begin = None, end = None,
-                         firstRun = None, lastRun = None, repMap = None,
-                         crab = False, parent = False ):
-        if firstRun:
-            firstRun = int( firstRun )
-        if lastRun:
-            lastRun = int( lastRun )
-        if ( begin and firstRun ) or ( end and lastRun ):
-            msg = ( "The Usage of "
-                    + "'begin' & 'firstRun' " * int( bool( begin and
-                                                           firstRun ) )
-                    + "and " * int( bool( ( begin and firstRun ) and
-                                         ( end and lastRun ) ) )
-                    + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
-                    + "is ambigous." )
-            raise AllInOneError( msg )
-        if begin or end:
-            ( firstRun, lastRun ) = self.convertTimeToRun(
-                begin = begin, end = end, firstRun = firstRun,
-                lastRun = lastRun )
-        if ( firstRun and lastRun ) and ( firstRun > lastRun ):
-            msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
-                    "chosen is greater than the upper time/runrange limit "
-                    "('end'/'lastRun').")
-            raise AllInOneError( msg )
-        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
-            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
-                    "only work for official datasets, not predefined _cff.py files" )
-            raise AllInOneError( msg )
-        goodLumiSecStr = ""
-        lumiStr = ""
+    def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
         lumiSecExtend = ""
         if firstRun or lastRun or jsonPath:
-            goodLumiSecStr = ( "lumiSecs = cms.untracked."
-                               "VLuminosityBlockRange()\n" )
-            lumiStr = "                    lumisToProcess = lumiSecs,\n"
             if not jsonPath:
                 selectedRunList = self.__getRunList()
                 if firstRun:
@@ -224,39 +191,78 @@ def __createSnippet( self, jsonPath = None, begin = None, end = None,
             else:
                 msg = "You are trying to run a validation without any runs!  Check that:"
                 if firstRun or lastRun:
-                    msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
+                    msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
                 if jsonPath:
                     msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
                 if (firstRun or lastRun) and jsonPath:
-                    msg += "\n - firstRun and lastRun are consistent with your JSON file"
-                if begin:
-                    msg = msg.replace("firstRun", "begin")
-                if end:
-                    msg = msg.replace("lastRun", "end")
+                    msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
                 raise AllInOneError(msg)
 
         else:
             runlist = self.__getRunList()
             self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
             self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
 
+        return lumiSecExtend
+
+    def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
         if crab:
             files = ""
         else:
-            splitFileList = list( self.__chunks( self.fileList(), 255 ) )
+            splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
             fileStr = [ "',\n'".join( files ) for files in splitFileList ]
             fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                         for files in fileStr ]
             files = "\n".join( fileStr )
 
             if parent:
-                splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
+                splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
                 parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
                 parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
                             for parentFiles in parentFileStr ]
                 parentFiles = "\n".join( parentFileStr )
                 files += "\n\n" + parentFiles
 
+        return files
+
+    def __createSnippet( self, jsonPath = None, begin = None, end = None,
+                         firstRun = None, lastRun = None, repMap = None,
+                         crab = False, parent = False ):
+
+        if firstRun:
+            firstRun = int( firstRun )
+        if lastRun:
+            lastRun = int( lastRun )
+        if ( begin and firstRun ) or ( end and lastRun ):
+            msg = ( "The Usage of "
+                    + "'begin' & 'firstRun' " * int( bool( begin and
+                                                           firstRun ) )
+                    + "and " * int( bool( ( begin and firstRun ) and
+                                         ( end and lastRun ) ) )
+                    + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
+                    + "is ambigous." )
+            raise AllInOneError( msg )
+        if begin or end:
+            ( firstRun, lastRun ) = self.convertTimeToRun(
+                begin = begin, end = end, firstRun = firstRun,
+                lastRun = lastRun )
+        if ( firstRun and lastRun ) and ( firstRun > lastRun ):
+            msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
+                    "chosen is greater than the upper time/runrange limit "
+                    "('end'/'lastRun').")
+            raise AllInOneError( msg )
+        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
+            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
+                    "only work for official datasets, not predefined _cff.py files" )
+            raise AllInOneError( msg )
+
+        lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
+        lumiStr = goodLumiSecStr = ""
+        if lumiSecExtend:
+            goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
+            lumiStr = "                    lumisToProcess = lumiSecs,\n"
+
+        files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
 
         theMap = repMap
         theMap["files"] = files
@@ -820,15 +826,70 @@ def dump_cff( self, outName = None, jsonPath = None, begin = None,
         theFile.close()
         return
 
-    def fileList( self, parent = False ):
+    def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
+        with open(filename, "w") as f:
+            for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
+                f.write(",".join("'{}'".format(file) for file in job)+"\n")
+
+    @staticmethod
+    def getrunnumberfromfilename(filename):
+        parts = filename.split("/")
+        result = error = None
+        if parts[0] != "" or parts[1] != "store":
+            error = "does not start with /store"
+        elif parts[2] in ["mc", "relval"]:
+            result = 1
+        elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
+            error = "does not end with 00000/something.root"
+        elif len(parts) != 12:
+            error = "should be exactly 11 slashes counting the first one"
+        else:
+            runnumberparts = parts[-5:-2]
+            if not all(len(part)==3 for part in runnumberparts):
+                error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
+            try:
+                result = int("".join(runnumberparts))
+            except ValueError:
+                error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
+
+        if error:
+            error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
+            raise AllInOneError(error)
+
+        return result
+
+    def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
         if self.__fileList and not parent:
             return self.__fileList
         if self.__parentFileList and parent:
             return self.__parentFileList
 
-        fileList = [ self.__findInJson(fileInfo,"name") \
+        fileList = [ self.__findInJson(fileInfo,"name")
                      for fileInfo in self.fileInfoList(parent) ]
 
+        if firstRun is not None or lastRun is not None:
+            if firstRun is None: firstRun = -1
+            if lastRun is None: lastRun = float('infinity')
+            unknownfilenames, reasons = [], set()
+            for filename in fileList[:]:
+                try:
+                    if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
+                        fileList.remove(filename)
+                except AllInOneError as e:
+                    if forcerunselection: raise
+                    unknownfilenames.append(e.message.split("\n")[1])
+                    reasons         .add   (e.message.split("\n")[2])
+            if reasons:
+                if len(unknownfilenames) == len(fileList):
+                    print "Could not figure out the run numbers of any of the filenames for the following reason(s):"
+                else:
+                    print "Could not figure out the run numbers of the following filenames:"
+                    for filename in unknownfilenames:
+                        print "    "+filename
+                    print "for the following reason(s):"
+                for reason in reasons:
+                    print "    "+reason
+                print "Using the files anyway.  The runs will be filtered at the CMSSW level."
         if not parent:
             self.__fileList = fileList
         else:

diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py
@@ -252,9 +252,6 @@ def createScript(self, path):
                 resultingFile = os.path.expandvars( resultingFile )
                 resultingFile = os.path.abspath( resultingFile )
                 resultingFile = "root://eoscms//eos/cms" + resultingFile   #needs to be AFTER abspath so that it doesn't eat the //
-                repMap["runComparisonScripts"] += \
-                    ("xrdcp -f OUTPUT_comparison.root %s\n"
-                     %resultingFile)
                 self.filesToCompare[ name ] = resultingFile
 
             else: