Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add enforceGUIDInFileName option to PoolSource and EmbeddedRootSource (11_0_X) #28586

Merged
merged 8 commits into from Dec 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py
Expand Up @@ -30,5 +30,6 @@
'ProductDoesNotSupportViews',
'ProductDoesNotSupportPtr',
'NotFound',
'FormatIncompatibility'
'FormatIncompatibility',
'FileNameInconsistentWithGUID',
)
36 changes: 36 additions & 0 deletions FWCore/MessageLogger/scripts/edmFjrDump
@@ -0,0 +1,36 @@
#!/usr/bin/env python

from __future__ import print_function
import xml.etree.ElementTree as ET
import argparse

def printGUID(root):
for f in root.findall("File"):
print(f.find("GUID").text)

def printExitCode(root):
error = root.find("FrameworkError")
if error is None:
print("0")
return
print(error.get("ExitStatus"))

def main(opts):
tree = ET.parse(opts.file)
root = tree.getroot()
if opts.guid:
printGUID(root)
if opts.exitCode:
printExitCode(root)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extract some values from the Framework Job Report")
parser.add_argument("file", type=str,
help="Framework Job Report XML file")
parser.add_argument("--guid", action="store_true",
help="GUID of the output file")
parser.add_argument("--exitCode", action="store_true",
help="Job exit code")

opts = parser.parse_args()
main(opts)
5 changes: 3 additions & 2 deletions FWCore/Utilities/interface/EDMException.h
Expand Up @@ -16,8 +16,7 @@ namespace edm {
namespace errors {

// If you add a new entry to the set of values, make sure to
// update the translation map in EDMException.cc, the actions
// table in FWCore/Framework/src/Actions.cc, and the configuration
// update the translation map in EDMException.cc, and the configuration
// fragment FWCore/Framework/python/test/cmsExceptionsFatalOption_cff.py.

enum ErrorCodes {
Expand Down Expand Up @@ -66,6 +65,8 @@ namespace edm {

FileWriteError = 8033,

FileNameInconsistentWithGUID = 8034,

EventGenerationFailure = 8501,

CaughtSignal = 9000
Expand Down
19 changes: 19 additions & 0 deletions FWCore/Utilities/interface/stemFromPath.h
@@ -0,0 +1,19 @@
#ifndef FWCore_Utilities_stemFromPath_h
#define FWCore_Utilities_stemFromPath_h

#include <string_view>

namespace edm {
// This functions extracts the stem of a file from the path (= file
// name without the extension). The returned value is a string_view
// to the input string. Caller should ensure that the input string
// object lives long enough.
//
// The reason to have our own function instead of
// std/boost::filesystem is that tehcnically these paths are not
// filesystem paths, but paths in CMS LFN/PFN space that (may) have
// different rules.
std::string_view stemFromPath(std::string_view path);
} // namespace edm

#endif
1 change: 1 addition & 0 deletions FWCore/Utilities/src/EDMException.cc
Expand Up @@ -42,6 +42,7 @@ namespace edm {
FILLENTRY(ExceededResourceRSS),
FILLENTRY(ExceededResourceTime),
FILLENTRY(FileWriteError),
FILLENTRY(FileNameInconsistentWithGUID),
FILLENTRY(EventGenerationFailure),
FILLENTRY(CaughtSignal)};
static const std::string kUnknownCode("unknownCode");
Expand Down
20 changes: 20 additions & 0 deletions FWCore/Utilities/src/stemFromPath.cc
@@ -0,0 +1,20 @@
#include "FWCore/Utilities/interface/stemFromPath.h"

namespace edm {
std::string_view stemFromPath(std::string_view path) {
auto begin = path.rfind("/");
if (begin == std::string_view::npos) {
begin = path.rfind(":");
if (begin == std::string_view::npos) {
// shouldn't really happen?
begin = 0;
} else {
begin += 1;
}
} else {
begin += 1;
}
auto end = path.find(".", begin);
return path.substr(begin, end - begin);
}
} // namespace edm
3 changes: 3 additions & 0 deletions FWCore/Utilities/test/BuildFile.xml
Expand Up @@ -34,6 +34,9 @@
<bin name="testFWCoreUtilities" file="typeidbase_t.cppunit.cpp,typeid_t.cppunit.cpp,cputimer_t.cppunit.cpp,esinputtag.cppunit.cpp,extensioncord_t.cppunit.cpp,friendlyname_t.cppunit.cpp,signal_t.cppunit.cpp,soatuple_t.cppunit.cpp,transform.cppunit.cpp,callxnowait_t.cppunit.cpp,vecarray.cppunit.cpp,reusableobjectholder_t.cppunit.cpp,propagate_const_t.cppunit.cpp,indexset.cppunit.cpp">
<use name="cppunit"/>
</bin>
<bin file="test_catch2_*.cc" name="testFWCoreUtilitiesCatch2">
<use name="catch2"/>
</bin>

<bin file="InputTag_t.cpp">
<use name="tbb"/>
Expand Down
2 changes: 2 additions & 0 deletions FWCore/Utilities/test/test_catch2_main.cc
@@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
15 changes: 15 additions & 0 deletions FWCore/Utilities/test/test_catch2_stemFromPath.cc
@@ -0,0 +1,15 @@
#include "FWCore/Utilities/interface/stemFromPath.h"

#include "catch.hpp"

TEST_CASE("Test stemFromPath", "[sources]") {
CHECK(edm::stemFromPath("foo.root") == "foo");
CHECK(edm::stemFromPath("/foo.root") == "foo");
CHECK(edm::stemFromPath("/bar/foo.root") == "foo");
CHECK(edm::stemFromPath("/bar///....//...///foo.root") == "foo");
CHECK(edm::stemFromPath("/bar/foo.xyzzy") == "foo");
CHECK(edm::stemFromPath("/bar/xyzzy.foo.root") == "xyzzy");
CHECK(edm::stemFromPath("file:foo.root") == "foo");
CHECK(edm::stemFromPath("file:/path/to/bar.txt") == "bar");
CHECK(edm::stemFromPath("root://server.somewhere:port/whatever?param=path/to/bar.txt") == "bar");
}
10 changes: 8 additions & 2 deletions IOPool/Input/src/RootEmbeddedFileSequence.cc
Expand Up @@ -42,7 +42,8 @@ namespace edm {
// and should be deleted from the code.
initialNumberOfEventsToSkip_(pset.getUntrackedParameter<unsigned int>("skipEvents", 0U)),
treeCacheSize_(pset.getUntrackedParameter<unsigned int>("cacheSize", roottree::defaultCacheSize)),
enablePrefetching_(false) {
enablePrefetching_(false),
enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName", false)) {
if (noFiles()) {
throw Exception(errors::NoSecondaryFiles)
<< "RootEmbeddedFileSequence no input files specified for secondary input source.\n";
Expand Down Expand Up @@ -138,7 +139,8 @@ namespace edm {
currentIndexIntoFile,
orderedProcessHistoryIDs_,
input_.bypassVersionCheck(),
enablePrefetching_);
enablePrefetching_,
enforceGUIDInFileName_);
}

void RootEmbeddedFileSequence::skipEntries(unsigned int offset) {
Expand Down Expand Up @@ -336,5 +338,9 @@ namespace edm {
"Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False");
desc.addUntracked<unsigned int>("cacheSize", roottree::defaultCacheSize)
->setComment("Size of ROOT TTree prefetch cache. Affects performance.");
desc.addUntracked<bool>("enforceGUIDInFileName", false)
->setComment(
"True: file name part is required to be equal to the GUID of the file\n"
"False: file name can be anything");
}
} // namespace edm
1 change: 1 addition & 0 deletions IOPool/Input/src/RootEmbeddedFileSequence.h
Expand Up @@ -70,6 +70,7 @@ namespace edm {
int initialNumberOfEventsToSkip_;
unsigned int treeCacheSize_;
bool enablePrefetching_;
bool enforceGUIDInFileName_;
}; // class RootEmbeddedFileSequence
} // namespace edm
#endif
13 changes: 12 additions & 1 deletion IOPool/Input/src/RootFile.cc
Expand Up @@ -43,6 +43,7 @@
#include "FWCore/Utilities/interface/FriendlyName.h"
#include "FWCore/Utilities/interface/GlobalIdentifier.h"
#include "FWCore/Utilities/interface/ReleaseVersion.h"
#include "FWCore/Utilities/interface/stemFromPath.h"
#include "FWCore/Version/interface/GetReleaseVersion.h"
#include "IOPool/Common/interface/getWrapperBasePtr.h"

Expand Down Expand Up @@ -164,7 +165,8 @@ namespace edm {
bool bypassVersionCheck,
bool labelRawDataLikeMC,
bool usingGoToEvent,
bool enablePrefetching)
bool enablePrefetching,
bool enforceGUIDInFileName)
: file_(fileName),
logicalFile_(logicalFileName),
processConfiguration_(processConfiguration),
Expand All @@ -187,6 +189,7 @@ namespace edm {
savedRunAuxiliary_(),
skipAnyEvents_(skipAnyEvents),
noEventSort_(noEventSort),
enforceGUIDInFileName_(enforceGUIDInFileName),
whyNotFastClonable_(0),
hasNewlyDroppedBranch_(),
branchListIndexesUnchanged_(false),
Expand Down Expand Up @@ -1139,6 +1142,14 @@ namespace edm {
throw Exception(errors::EventCorruption) << "'Events' tree is corrupted or not present\n"
<< "in the input file.\n";
}
if (enforceGUIDInFileName_) {
auto guidFromName = stemFromPath(file_);
if (guidFromName != fid_.fid()) {
throw edm::Exception(edm::errors::FileNameInconsistentWithGUID)
<< "GUID " << guidFromName << " extracted from file name " << file_
<< " is inconsistent with the GUID read from the file " << fid_.fid();
}
}

if (fileFormatVersion().hasIndexIntoFile()) {
if (runTree().entries() > 0) {
Expand Down
16 changes: 11 additions & 5 deletions IOPool/Input/src/RootFile.h
Expand Up @@ -91,7 +91,8 @@ namespace edm {
bool bypassVersionCheck,
bool labelRawDataLikeMC,
bool usingGoToEvent,
bool enablePrefetching);
bool enablePrefetching,
bool enforceGUIDInFileName);

RootFile(std::string const& fileName,
ProcessConfiguration const& processConfiguration,
Expand All @@ -113,7 +114,8 @@ namespace edm {
std::vector<ProcessHistoryID>& orderedProcessHistoryIDs,
bool bypassVersionCheck,
bool labelRawDataLikeMC,
bool enablePrefetching)
bool enablePrefetching,
bool enforceGUIDInFileName)
: RootFile(fileName,
processConfiguration,
logicalFileName,
Expand Down Expand Up @@ -142,7 +144,8 @@ namespace edm {
bypassVersionCheck,
labelRawDataLikeMC,
false,
enablePrefetching) {}
enablePrefetching,
enforceGUIDInFileName) {}

RootFile(std::string const& fileName,
ProcessConfiguration const& processConfiguration,
Expand All @@ -159,7 +162,8 @@ namespace edm {
std::vector<std::shared_ptr<IndexIntoFile>>::size_type currentIndexIntoFile,
std::vector<ProcessHistoryID>& orderedProcessHistoryIDs,
bool bypassVersionCheck,
bool enablePrefetching)
bool enablePrefetching,
bool enforceGUIDInFileName)
: RootFile(fileName,
processConfiguration,
logicalFileName,
Expand Down Expand Up @@ -188,7 +192,8 @@ namespace edm {
bypassVersionCheck,
false,
false,
enablePrefetching) {}
enablePrefetching,
enforceGUIDInFileName) {}

~RootFile();

Expand Down Expand Up @@ -325,6 +330,7 @@ namespace edm {
edm::propagate_const<std::shared_ptr<RunAuxiliary>> savedRunAuxiliary_;
bool skipAnyEvents_;
bool noEventSort_;
bool enforceGUIDInFileName_;
int whyNotFastClonable_;
std::array<bool, NumBranchTypes> hasNewlyDroppedBranch_;
bool branchListIndexesUnchanged_;
Expand Down
10 changes: 8 additions & 2 deletions IOPool/Input/src/RootPrimaryFileSequence.cc
Expand Up @@ -32,7 +32,8 @@ namespace edm {
treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter<unsigned int>("cacheSize") : 0U),
duplicateChecker_(new DuplicateChecker(pset)),
usingGoToEvent_(false),
enablePrefetching_(false) {
enablePrefetching_(false),
enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName")) {
// The SiteLocalConfig controls the TTreeCache size and the prefetching settings.
Service<SiteLocalConfig> pSLC;
if (pSLC.isAvailable()) {
Expand Down Expand Up @@ -137,7 +138,8 @@ namespace edm {
input_.bypassVersionCheck(),
input_.labelRawDataLikeMC(),
usingGoToEvent_,
enablePrefetching_);
enablePrefetching_,
enforceGUIDInFileName_);
}

bool RootPrimaryFileSequence::nextFile() {
Expand Down Expand Up @@ -322,6 +324,10 @@ namespace edm {
->setComment(
"'strict': Branches in each input file must match those in the first file.\n"
"'permissive': Branches in each input file may be any subset of those in the first file.");
desc.addUntracked<bool>("enforceGUIDInFileName", false)
->setComment(
"True: file name part is required to be equal to the GUID of the file\n"
"False: file name can be anything");

EventSkipperByID::fillDescription(desc);
DuplicateChecker::fillDescription(desc);
Expand Down
1 change: 1 addition & 0 deletions IOPool/Input/src/RootPrimaryFileSequence.h
Expand Up @@ -77,6 +77,7 @@ namespace edm {
edm::propagate_const<std::shared_ptr<DuplicateChecker>> duplicateChecker_;
bool usingGoToEvent_;
bool enablePrefetching_;
bool enforceGUIDInFileName_;
}; // class RootPrimaryFileSequence
} // namespace edm
#endif
9 changes: 7 additions & 2 deletions IOPool/Input/src/RootSecondaryFileSequence.cc
Expand Up @@ -21,7 +21,11 @@ namespace edm {
RootSecondaryFileSequence::RootSecondaryFileSequence(ParameterSet const& pset,
PoolSource& input,
InputFileCatalog const& catalog)
: RootInputFileSequence(pset, catalog), input_(input), orderedProcessHistoryIDs_(), enablePrefetching_(false) {
: RootInputFileSequence(pset, catalog),
input_(input),
orderedProcessHistoryIDs_(),
enablePrefetching_(false),
enforceGUIDInFileName_(pset.getUntrackedParameter<bool>("enforceGUIDInFileName")) {
// The SiteLocalConfig controls the TTreeCache size and the prefetching settings.
Service<SiteLocalConfig> pSLC;
if (pSLC.isAvailable()) {
Expand Down Expand Up @@ -85,7 +89,8 @@ namespace edm {
orderedProcessHistoryIDs_,
input_.bypassVersionCheck(),
input_.labelRawDataLikeMC(),
enablePrefetching_);
enablePrefetching_,
enforceGUIDInFileName_);
}

void RootSecondaryFileSequence::initAssociationsFromSecondary(std::set<BranchID> const& associationsFromSecondary) {
Expand Down
1 change: 1 addition & 0 deletions IOPool/Input/src/RootSecondaryFileSequence.h
Expand Up @@ -45,6 +45,7 @@ namespace edm {
std::vector<BranchID> associationsFromSecondary_;
std::vector<ProcessHistoryID> orderedProcessHistoryIDs_;
bool enablePrefetching_;
bool enforceGUIDInFileName_;
}; // class RootSecondaryFileSequence
} // namespace edm
#endif
29 changes: 29 additions & 0 deletions IOPool/Input/test/PoolGUIDTest_cfg.py
@@ -0,0 +1,29 @@
# Configuration file for PoolInputTest

import FWCore.ParameterSet.Config as cms
import sys

argv = []
foundpy = False
for a in sys.argv:
if foundpy:
argv.append(a)
if ".py" in a:
foundpy = True

process = cms.Process("TESTRECO")
process.load("FWCore.Framework.test.cmsExceptionsFatal_cff")

process.maxEvents.input = -1
process.OtherThing = cms.EDProducer("OtherThingProducer")
process.Analysis = cms.EDAnalyzer("OtherThingAnalyzer")

process.source = cms.Source("PoolSource",
setRunNumber = cms.untracked.uint32(621),
fileNames = cms.untracked.vstring(argv[0]),
enforceGUIDInFileName = cms.untracked.bool(True)
)

process.p = cms.Path(process.OtherThing*process.Analysis)