Merge pull request #21095 from cms-nanoAOD/oct31st

NanoAOD update
cms-sw · Nov 14, 2017 · 499f659 · 499f659
2 parents 45cd82f + d960152
commit 499f659
Show file tree

Hide file tree

Showing 33 changed files with 1,545 additions and 371 deletions.
diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -478,7 +478,7 @@ def identitySim(wf):
 #input for a NANOAOD from MINIAOD workflow
 steps['ZEE_13_80XNanoAODINPUT']={'INPUT':InputInfo(dataSet='/RelValZEE_13/CMSSW_8_0_21-PU25ns_80X_mcRun2_asymptotic_2016_TrancheIV_v6_Tr4GT_v6-v1/MINIAODSIM',label='nanoaod80X',location='STD')}
 steps['TTbar_13_92XNanoAODINPUT']={'INPUT':InputInfo(dataSet='/RelValTTbar_13/CMSSW_9_2_12-PU25ns_92X_upgrade2017_realistic_v11-v1/MINIAODSIM',label='nanoaod92X',location='STD')}
-steps['TTbar_13_94XNanoAODINPUT']={'INPUT':InputInfo(dataSet='/RelValTTbar_13/CMSSW_9_4_0_pre1-PU25ns_93X_mc2017_realistic_v3-v1/MINIAODSIM',label='nanoaod94X',location='STD')}
+steps['TTbar_13_94XNanoAODINPUT']={'INPUT':InputInfo(dataSet='/RelValTTbar_13/CMSSW_9_4_0_pre3-PU25ns_94X_mc2017_realistic_v4-v1/MINIAODSIM',label='nanoaod94X',location='STD')}
 
 # 13 TeV recycle GEN-SIM input
 steps['MinBias_13INPUT']={'INPUT':InputInfo(dataSet='/RelValMinBias_13/%s/GEN-SIM'%(baseDataSetRelease[3],),location='STD')}
@@ -1992,12 +1992,12 @@ def gen2018HiMix(fragment,howMuch):
                                    '--datatier' : 'MINIAODSIM',
                                    '--eventcontent':'MINIAOD',},stepMiniAODMC])
 
+stepNanoAODDefaults = { '-s': 'NANO,DQM:@nanoAODDQM', '-n': 1000 }
+stepNanoAODData = merge([{ '--data':'', '--eventcontent' : 'NANOAOD,DQM' ,'--datatier': 'NANOAOD,DQMIO'    }, stepNanoAODDefaults ])
+stepNanoAODMC   = merge([{ '--mc':''  , '--eventcontent' : 'NANOAODSIM,DQM','--datatier': 'NANOAODSIM,DQMIO' }, stepNanoAODDefaults ])
+stepNanoEDMData = merge([{ '--data':'', '--eventcontent' : 'NANOAOD,DQM' ,'--datatier': 'NANOEDMAOD,DQMIO'     }, stepNanoAODDefaults ])
+stepNanoEDMMC   = merge([{ '--mc':''  , '--eventcontent' : 'NANOAODSIM,DQM','--datatier': 'NANOEDMAODSIM,DQMIO'    }, stepNanoAODDefaults ])
 
-stepNanoAODDefaults = { '-s': 'NANO', '-n': 1000 }
-stepNanoAODData = merge([{ '--data':'', '--eventcontent' : 'NANOAOD' ,'--datatier': 'NANOAOD'    }, stepNanoAODDefaults ])
-stepNanoAODMC   = merge([{ '--mc':''  , '--eventcontent' : 'NANOAODSIM','--datatier': 'NANOAODSIM' }, stepNanoAODDefaults ])
-stepNanoEDMData = merge([{ '--data':'', '--eventcontent' : 'NANOAOD' ,'--datatier': 'NANOEDMAOD'      }, stepNanoAODDefaults ])
-stepNanoEDMMC   = merge([{ '--mc':''  , '--eventcontent' : 'NANOAODSIM','--datatier': 'NANOEDMAODSIM' }, stepNanoAODDefaults ])
 
 steps['NANOAOD2016']   = merge([{'--conditions': 'auto:run2_data_relval', '--era': 'Run2_2016'}, stepNanoAODData ])
 steps['NANOAOD2017']   = merge([{'--conditions': 'auto:run2_data_relval', '--era': 'Run2_2017'}, stepNanoAODData ])

diff --git a/DQMOffline/Configuration/python/DQMOfflineMC_cff.py b/DQMOffline/Configuration/python/DQMOfflineMC_cff.py
@@ -28,3 +28,6 @@
     label = 'TrackerCollisionSelectedTrackMonMB' + str(tracks)
     locals()[label].doEffFromHitPatternVsBX = False
 
+from PhysicsTools.NanoAOD.nanoDQM_cff import nanoDQMMC
+DQMOfflineNanoAOD.replace(nanoDQM, nanoDQMMC)
+#PostDQMOfflineNanoAOD.replace(nanoDQM, nanoDQMMC)
diff --git a/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py b/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py
@@ -18,6 +18,7 @@
 from DQMOffline.Hcal.HcalDQMOfflinePostProcessor_cff import *
 from DQMOffline.L1Trigger.L1TriggerDqmOffline_cff import *
 from DQM.HcalTasks.OfflineHarvestingSequence_pp import *
+from PhysicsTools.NanoAOD.nanoDQM_cff import *
 
 DQMOffline_SecondStep_PreDPG = cms.Sequence( dqmDcsInfoClient *
                                              ecal_dqm_client_offline *
@@ -145,3 +146,5 @@
 DQMHarvestBTag = cms.Sequence( bTagCollectorSequenceDATA )
 
 DQMHarvestMiniAOD = cms.Sequence( dataCertificationJetMETSequence * muonQualityTests_miniAOD)
+DQMHarvestNanoAOD = cms.Sequence( nanoHarvest )
+
diff --git a/DQMOffline/Configuration/python/DQMOffline_cff.py b/DQMOffline/Configuration/python/DQMOffline_cff.py
@@ -181,3 +181,9 @@
 phase2_hcal.toReplaceWith( PostDQMOfflineMiniAOD, PostDQMOfflineMiniAOD.copyAndExclude([
     pfMetDQMAnalyzerMiniAOD, pfPuppiMetDQMAnalyzerMiniAOD # No hcalnoise yet
 ]))
+
+
+from PhysicsTools.NanoAOD.nanoDQM_cff import nanoDQM
+DQMOfflineNanoAOD = cms.Sequence(nanoDQM)
+#PostDQMOfflineNanoAOD = cms.Sequence(nanoDQM)
+
diff --git a/DQMOffline/Configuration/python/autoDQM.py b/DQMOffline/Configuration/python/autoDQM.py
@@ -47,6 +47,9 @@
             'miniAODDQM': ['DQMOfflineMiniAOD',
                            'PostDQMOfflineMiniAOD',
                            'DQMHarvestMiniAOD'],
+            'nanoAODDQM': ['DQMOfflineNanoAOD',
+                           'PostDQMOffline',
+                           'DQMHarvestNanoAOD'],
             'standardDQM': ['DQMOffline',
                             'PostDQMOffline',
                             'dqmHarvesting'],

diff --git a/DataFormats/NanoAOD/interface/FlatTable.h b/DataFormats/NanoAOD/interface/FlatTable.h
@@ -69,6 +69,22 @@ class FlatTable {
          return * beginData<T>(column);
     }
 
+    double getAnyValue(unsigned int row, unsigned int column) const ;
+
+    class RowView {
+        public:
+            RowView() {}
+            RowView(const FlatTable & table, unsigned int row) : table_(&table), row_(row) {}
+            double getAnyValue(unsigned int column) const { return table_->getAnyValue(row_, column); }
+            double getAnyValue(const std::string & column) const { return table_->getAnyValue(row_, table_->columnIndex(column)); }
+            const FlatTable & table() const { return *table_; }
+            unsigned int row() const { return row_; }
+        private:
+            const FlatTable * table_;
+            unsigned int row_;
+    };
+    RowView row(unsigned int row) const { return RowView(*this, row); }
+
     template<typename T, typename C = std::vector<T>>
     void addColumn(const std::string & name, const C & values, const std::string & docString, ColumnType type = defaultColumnType<T>(),int mantissaBits=-1) {
         if (columnIndex(name) != -1) throw cms::Exception("LogicError", "Duplicated column: "+name); 
@@ -94,7 +110,9 @@ class FlatTable {
             vec.push_back( value );
         }
     }
-
+
+    void addExtension(const FlatTable & extension) ;
+
     template<typename T> static ColumnType defaultColumnType() { throw cms::Exception("unsupported type"); }
 
     // this below needs to be public for ROOT, but it is to be considered private otherwise

diff --git a/DataFormats/NanoAOD/src/FlatTable.cc b/DataFormats/NanoAOD/src/FlatTable.cc
@@ -6,3 +6,32 @@ int nanoaod::FlatTable::columnIndex(const std::string & name) const {
     }
     return -1;
 }
+
+void nanoaod::FlatTable::addExtension(const nanoaod::FlatTable & other) {
+    if (extension() || !other.extension() || name() != other.name() || size() != other.size()) throw cms::Exception("LogicError", "Mismatch in adding extension");
+    for (unsigned int i = 0, n = other.nColumns(); i < n; ++i) {
+        switch(other.columnType(i)) {
+            case FloatColumn:
+                addColumn<float>(other.columnName(i), other.columnData<float>(i), other.columnDoc(i), other.columnType(i));
+                break;
+            case IntColumn:
+                addColumn<int>(other.columnName(i), other.columnData<int>(i), other.columnDoc(i), other.columnType(i));
+                break;
+            case BoolColumn: // as UInt8
+            case UInt8Column:
+                addColumn<uint8_t>(other.columnName(i), other.columnData<uint8_t>(i), other.columnDoc(i), other.columnType(i));
+                break;
+        }
+    }
+}
+
+double nanoaod::FlatTable::getAnyValue(unsigned int row, unsigned int column) const {
+    if (column >= nColumns()) throw cms::Exception("LogicError","Invalid column");
+    switch(columnType(column)) {
+        case FloatColumn:  return *(beginData<float>(column)+row);
+        case IntColumn:  return *(beginData<int>(column)+row);
+        case BoolColumn:  return *(beginData<uint8_t>(column)+row);
+        case UInt8Column:  return *(beginData<uint8_t>(column)+row);
+    }   
+    throw cms::Exception("LogicError", "Unsupported type");
+}
diff --git a/DataFormats/NanoAOD/src/classes.h b/DataFormats/NanoAOD/src/classes.h
@@ -9,6 +9,7 @@ namespace DataFormats_NanoAOD {
     struct dictionary {
         nanoaod::FlatTable table;
         edm::Wrapper<nanoaod::FlatTable> w_table;
+        nanoaod::FlatTable::RowView table_cursor;
         edm::Wrapper<nanoaod::MergeableCounterTable> w_mtable;
         edm::Wrapper<nanoaod::UniqueString> w_ustr;
     };

diff --git a/DataFormats/NanoAOD/src/classes_def.xml b/DataFormats/NanoAOD/src/classes_def.xml
@@ -6,6 +6,7 @@
     <class name="nanoaod::FlatTable" ClassVersion="3">
         <version ClassVersion="3" checksum="2443023556"/>
     </class>
+    <class name="nanoaod::FlatTable::RowView" transient="true" />
     <class name="edm::Wrapper<nanoaod::FlatTable>" />
 
     <class name="nanoaod::MergeableCounterTable::FloatColumn" ClassVersion="3">

diff --git a/PhysicsTools/NanoAOD/plugins/BuildFile.xml b/PhysicsTools/NanoAOD/plugins/BuildFile.xml
@@ -13,7 +13,7 @@
 <use   name="RecoVertex/VertexPrimitives"/>
 <use   name="DataFormats/L1TGlobal"/>
 <use   name="IOPool/Provenance"/>
-
+<use   name="DQMServices/Core"/>
 <library   file="*.cc" name="PhysicsToolsNanoAODPlugins">
   <flags   EDM_PLUGIN="1"/>
 </library>
diff --git a/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc b/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc
@@ -227,10 +227,11 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
                 std::vector<ScaleVarWeight> scaleVariationIDs;
                 std::vector<PDFSetWeights>  pdfSetWeightIDs;
 
-                std::regex weightgroup("<weightgroup\\s+combine=\"(.*)\"\\s+name=\"(.*)\"\\s*>");
+                std::regex weightgroup("<weightgroup\\s+combine=\"(.*)\"\\s+(?:name|type)=\"(.*)\"\\s*>");
                 std::regex endweightgroup("</weightgroup>");
-                std::regex scalew("<weight\\s+id=\"(\\d+)\">\\s*(muR=(\\S+)\\s+muF=(\\S+)(\\s+.*)?)</weight>");
+                std::regex scalew("<weight\\s+id=\"(\\d+)\">\\s*(mu[rR]=(\\S+)\\s+mu[Ff]=(\\S+)(\\s+.*)?)</weight>");
                 std::regex pdfw("<weight\\s+id=\"(\\d+)\">\\s*PDF set\\s*=\\s*(\\d+)\\s*</weight>");
+                std::regex pdfwOld("<weight\\s+id=\"(\\d+)\">\\s*Member \\s*(\\d+)\\s*</weight>");
                 std::smatch groups;
                 for (auto iter=lheInfo->headers_begin(), end = lheInfo->headers_end(); iter != end; ++iter) {
                     if (iter->tag() != "initrwgt") {
@@ -243,7 +244,7 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
                         if (lheDebug) std::cout << lines[iLine];
                         if (std::regex_search(lines[iLine], groups, weightgroup)) {
                             if (lheDebug) std::cout << ">>> Looks like the beginning of a weight group for " << groups.str(2) << std::endl;
-                            if (groups.str(2) == "scale_variation") {
+                            if (groups.str(2) == "scale_variation" || groups.str(2) == "Central scale variation") {
                                 for ( ++iLine; iLine < nLines; ++iLine) {
                                     if (lheDebug) std::cout << "    " << lines[iLine];
                                     if (std::regex_search(lines[iLine], groups, scalew)) {
@@ -276,6 +277,26 @@ class GenWeightsTableProducer : public edm::global::EDProducer<edm::StreamCache<
                                         break;
                                     }
                                 }
+                            } else if (groups.str(2) == "NNPDF30_lo_as_0130.LHgrid") { // some old 80X samples have PDF names in the header instead of using "PDF_variation" (e.g. MLM LO samples)
+                                for ( ++iLine; iLine < nLines; ++iLine) {              // we explicitly catch this one, and set the LHA ID by hand
+                                    if (lheDebug) std::cout << "    " << lines[iLine];
+                                    if (std::regex_search(lines[iLine], groups, pdfwOld)) {
+                                        unsigned int lhaID = std::stoi(groups.str(2))+262000; // ids in LHE are 0 ... N, to be mapped to the LHAPDF ids 262000 ... 262000 + N
+                                                                                              // 262000 is NNPDF30_lo_as_0130, as per https://lhapdf.hepforge.org/pdfsets.html
+                                        if (lheDebug) std::cout << "    >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID << std::endl;
+                                        if (lhaID == 262000) continue; // skip the central value weight as we have it already as nominal weight, only record the uncertainty weights
+                                        if (pdfSetWeightIDs.empty() || ! pdfSetWeightIDs.back().maybe_add(groups.str(1),lhaID)) {
+                                            pdfSetWeightIDs.emplace_back(groups.str(1),lhaID);
+                                        }
+                                    } else if (std::regex_search(lines[iLine], endweightgroup)) {
+                                        if (lheDebug) std::cout << ">>> Looks like the end of a weight group" << std::endl;
+                                        break;
+                                    } else if (std::regex_search(lines[iLine], weightgroup)) {
+                                        if (lheDebug) std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end of the group." << std::endl;
+                                        --iLine; // rewind by one, and go back to the outer loop
+                                        break;
+                                    }
+                                }
                             } else {
                                 for ( ++iLine; iLine < nLines; ++iLine) {
                                     if (lheDebug) std::cout << "    " << lines[iLine];