diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.cc b/IOPool/Input/src/RootEmbeddedFileSequence.cc index d2b08d26a0a07..d31b7589da78a 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.cc +++ b/IOPool/Input/src/RootEmbeddedFileSequence.cc @@ -15,10 +15,18 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "CLHEP/Random/RandFlat.h" #include +#include +#include + +namespace { + std::atomic badFilesSkipped_{0}; + auto operator"" _uz(unsigned long long i) -> std::size_t { return std::size_t{i}; } // uz will be in C++23 +} // namespace namespace edm { class EventPrincipal; @@ -43,7 +51,8 @@ namespace edm { initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents", 0U)), treeCacheSize_(pset.getUntrackedParameter("cacheSize", roottree::defaultCacheSize)), enablePrefetching_(false), - enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName", false)) { + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName", false)), + maxFileSkips_(pset.getUntrackedParameter("maxFileSkips", std::min(3_uz, numberOfFiles()))) { if (noFiles()) { throw Exception(errors::NoSecondaryFiles) << "RootEmbeddedFileSequence no input files specified for secondary input source.\n"; @@ -92,17 +101,21 @@ namespace edm { unsigned int seed; f.read(reinterpret_cast(&seed), sizeof(seed)); std::default_random_engine dre(seed); - size_t count = numberOfFiles(); - std::uniform_int_distribution distribution(0, count - 1); - while (!rootFile() && count != 0) { - --count; + std::uniform_int_distribution distribution(0, numberOfFiles() - 1); + while (!rootFile() && badFilesSkipped_ < maxFileSkips_) { int offset = distribution(dre); setAtFileSequenceNumber(offset); initFile(input_.skipBadFiles()); + if (not rootFile()) { + ++badFilesSkipped_; + } } } if (rootFile()) { input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList()); + } else { + throw Exception(errors::FileOpenError) << "RootEmbeddedFileSequence::RootEmbeddedFileSequence(): " + << " input file retries exhausted.\n"; } } @@ -229,7 +242,7 @@ namespace edm { if (!found) { throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneSpecified(): Secondary Input files" << " do not contain specified event:\n" - << id << "\n"; + << id << " in file id " << idx.fileNameHash() << "\n"; } assert(rootFile()); found = rootFile()->readCurrentEvent(cache); @@ -246,16 +259,36 @@ namespace edm { assert(engine); unsigned int currentSeqNumber = sequenceNumberOfFile(); while (eventsRemainingInFile_ == 0) { - unsigned int newSeqNumber = CLHEP::RandFlat::shootInt(engine, fileCatalogItems().size()); - setAtFileSequenceNumber(newSeqNumber); - if (newSeqNumber != currentSeqNumber) { - initFile(false); - currentSeqNumber = newSeqNumber; + bool opened{false}; + while (!opened && badFilesSkipped_ < maxFileSkips_) { + unsigned int newSeqNumber = CLHEP::RandFlat::shootInt(engine, fileCatalogItems().size()); + setAtFileSequenceNumber(newSeqNumber); + if (newSeqNumber != currentSeqNumber) { + initFile(input_.skipBadFiles()); + currentSeqNumber = newSeqNumber; + } + if (rootFile()) { + eventsRemainingInFile_ = rootFile()->eventTree().entries(); + if (eventsRemainingInFile_ == 0) { + if (!input_.skipBadFiles()) { + throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file " + << fileNames()[0] << " contains no events.\n"; + } + LogWarning("RootEmbeddedFileSequence") << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file " + << fileNames()[0] << " contains no events and will be skipped.\n"; + ++badFilesSkipped_; + } else { + opened = true; + } + } else { + if (newSeqNumber != currentSeqNumber) { + ++badFilesSkipped_; + } + } } - eventsRemainingInFile_ = rootFile()->eventTree().entries(); - if (eventsRemainingInFile_ == 0) { - throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file " - << fileNames()[0] << " contains no events.\n"; + if (not opened) { + throw Exception(errors::FileOpenError) << "RootEmbeddedFileSequence::readOneRandom(): " + << " input file retries exhausted.\n"; } rootFile()->setAtEventEntry(CLHEP::RandFlat::shootInt(engine, eventsRemainingInFile_) - 1); } @@ -336,6 +369,10 @@ namespace edm { desc.addUntracked("skipEvents", 0U) ->setComment( "Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False"); + desc.addUntracked("maxFileSkips") + ->setComment( + "How many files to try if 'sequential' is False and 'skipBadFiles' is True.\n" + "Defaults to 3 (or # of files if smaller)."); desc.addUntracked("cacheSize", roottree::defaultCacheSize) ->setComment("Size of ROOT TTree prefetch cache. Affects performance."); desc.addUntracked("enforceGUIDInFileName", false) diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.h b/IOPool/Input/src/RootEmbeddedFileSequence.h index 2480ef8e19d43..39bb3bb6e8c1b 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.h +++ b/IOPool/Input/src/RootEmbeddedFileSequence.h @@ -71,6 +71,7 @@ namespace edm { unsigned int treeCacheSize_; bool enablePrefetching_; bool enforceGUIDInFileName_; + unsigned int maxFileSkips_; }; // class RootEmbeddedFileSequence } // namespace edm #endif diff --git a/IOPool/Input/src/RootInputFileSequence.cc b/IOPool/Input/src/RootInputFileSequence.cc index 1b1c24042345d..cdde0c54cd4d2 100644 --- a/IOPool/Input/src/RootInputFileSequence.cc +++ b/IOPool/Input/src/RootInputFileSequence.cc @@ -253,10 +253,10 @@ namespace edm { if (filePtr) { size_t currentIndexIntoFile = fileIter_ - fileIterBegin_; rootFile_ = makeRootFile(filePtr); + assert(rootFile_); if (input) { rootFile_->setSignals(&(input->preEventReadFromSourceSignal_), &(input->postEventReadFromSourceSignal_)); } - assert(rootFile_); fileIterLastOpened_ = fileIter_; setIndexIntoFile(currentIndexIntoFile); rootFile_->reportOpened(inputTypeName); diff --git a/IOPool/Input/src/RootInputFileSequence.h b/IOPool/Input/src/RootInputFileSequence.h index 905f2af1a41de..ded82ffec8964 100644 --- a/IOPool/Input/src/RootInputFileSequence.h +++ b/IOPool/Input/src/RootInputFileSequence.h @@ -3,7 +3,10 @@ /*---------------------------------------------------------------------- -RootInputFileSequence: This is an InputSource. initTheFile tries to open a file using a list of PFN names constructed from multiple data catalogs in site-local-config.xml. These are accessed via FileCatalogItem iterator fileIter_. +RootInputFileSequence: This is an InputSource. initTheFile tries to open +a file using a list of PFN names constructed from multiple data catalogs +in site-local-config.xml. These are accessed via FileCatalogItem iterator +fileIter_. ----------------------------------------------------------------------*/ diff --git a/IOPool/SecondaryInput/test/SecondaryInputTestSkip_cfg.py b/IOPool/SecondaryInput/test/SecondaryInputTestSkip_cfg.py new file mode 100644 index 0000000000000..e2beb6bf63e08 --- /dev/null +++ b/IOPool/SecondaryInput/test/SecondaryInputTestSkip_cfg.py @@ -0,0 +1,42 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("PROD") +process.load("FWCore.Framework.test.cmsExceptionsFatal_cff") + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(75) +) +process.RandomNumberGeneratorService = cms.Service("RandomNumberGeneratorService", + Thing = cms.PSet( + initialSeed = cms.untracked.uint32(12345) + ) +) + +process.source = cms.Source("PoolSource", + skipBadFiles = cms.untracked.bool(True), + fileNames = cms.untracked.vstring( + 'file:SecondaryInputTest.root', + 'file:SecondaryInputTest.root', + 'file:SecondaryInputTest.root' + ) +) + +process.Thing = cms.EDProducer("SecondaryProducer", + input = cms.SecSource("EmbeddedRootSource", + skipBadFiles = cms.untracked.bool(True), + maxFileSkips = cms.untracked.uint32(100), + fileNames = cms.untracked.vstring( + 'file:SecondaryInputTest2.root', + 'file:missing.root', + 'file:SecondaryInputTest2.root' + ) + ) +) + +process.Analysis = cms.EDAnalyzer("EventContentAnalyzer", + verbose = cms.untracked.bool(False) +) + +process.p = cms.Path(process.Thing*process.Analysis) + +