diff --git a/HLTrigger/Timer/interface/ProcessCallGraph.h b/HLTrigger/Timer/interface/ProcessCallGraph.h index 5b7fcea82ef75..f272a539e67c2 100644 --- a/HLTrigger/Timer/interface/ProcessCallGraph.h +++ b/HLTrigger/Timer/interface/ProcessCallGraph.h @@ -120,7 +120,7 @@ class ProcessCallGraph { public: // default c'tor - ProcessCallGraph(); + ProcessCallGraph() = default; // to be called from preSourceConstruction(...) void preSourceConstruction(edm::ModuleDescription const &); @@ -172,7 +172,7 @@ class ProcessCallGraph { GraphType graph_; // module id of the Source - unsigned int source_; + unsigned int source_ = edm::ModuleDescription::invalidID(); // map each (sub)process name to a "process id" std::unordered_map process_id_; diff --git a/HLTrigger/Timer/src/ProcessCallGraph.cc b/HLTrigger/Timer/src/ProcessCallGraph.cc index 7250ca4479a3d..53421c9c1b29c 100644 --- a/HLTrigger/Timer/src/ProcessCallGraph.cc +++ b/HLTrigger/Timer/src/ProcessCallGraph.cc @@ -29,8 +29,6 @@ #include "FWCore/Utilities/interface/EDMException.h" #include "HLTrigger/Timer/interface/ProcessCallGraph.h" -ProcessCallGraph::ProcessCallGraph() = default; - // adaptor to use range-based for loops with boost::graph edges(...) and vertices(...) functions template struct iterator_pair_as_a_range : std::pair { @@ -46,9 +44,10 @@ iterator_pair_as_a_range make_range(std::pair p) { return iterator_pair_as_a_range(p); } -// FIXME -// - check that the Source has not already been added void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& module) { + // check that the Source has not already been added + assert(source_ == edm::ModuleDescription::invalidID()); + // keep track of the Source module id source_ = module.id(); @@ -58,13 +57,15 @@ void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& modul } // FIXME -// - check that the Source has already been added // - check that all module ids are valid (e.g. subprocesses are not being added in // the wrong order) void ProcessCallGraph::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes, edm::ProcessContext const& context) { unsigned int pid = registerProcess(context); + // check that the Source has already been added + assert(source_ != edm::ModuleDescription::invalidID()); + // work on the full graph (for the main process) or a subgraph (for a subprocess) GraphType& graph = context.isSubProcess() ? graph_.create_subgraph() : graph_.root(); @@ -227,10 +228,8 @@ std::pair, std::vector> ProcessCallGraph } // register a (sub)process and assigns it a "process id" -// if called with a duplicate process name, returns the original process id +// throws an exception if called with a duplicate process name unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& context) { - static unsigned int s_id = 0; - // registerProcess (called by preBeginJob) must be called for the parent process before its subprocess(es) if (context.isSubProcess() and process_id_.find(context.parentProcessContext().processName()) == process_id_.end()) { throw edm::Exception(edm::errors::LogicError) @@ -246,7 +245,9 @@ unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& contex << (context.isSubProcess() ? "subprocess" : "process") << " " << context.processName(); } - std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), s_id++)); + // this assumes that registerProcess (called by preBeginJob) is not called concurrently from different threads + // otherwise, process_id_.size() should be replaces with an atomic counter + std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), process_id_.size())); return id->second; } diff --git a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml index b9bd22319cc8c..f6b5f0a63fb12 100644 --- a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml @@ -7,6 +7,7 @@ + diff --git a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc index 99960a719dab5..5cbf1819618b4 100644 --- a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc +++ b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc @@ -41,6 +41,7 @@ #include "FWCore/Utilities/interface/ProductKindOfType.h" #include "FWCore/Utilities/interface/TimeOfDay.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HLTrigger/Timer/interface/ProcessCallGraph.h" using namespace std::string_literals; @@ -287,6 +288,9 @@ class NVProfilerService { return highlight(label) ? nvtxLightAmber : nvtxLightGreen; } + // build a complete representation of the modules in the whole job + ProcessCallGraph callgraph_; + std::vector highlightModules_; const bool showModulePrefetching_; const bool skipFirstEvent_; @@ -502,7 +506,7 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) { std::stringstream out; out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, " << bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams() - << " streams\nrunning on" << bounds.maxNumberOfThreads() << " threads"; + << " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads"; nvtxDomainMark(global_domain_, out.str().c_str()); auto concurrentStreams = bounds.maxNumberOfStreams(); @@ -524,12 +528,13 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) { } void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes, - edm::ProcessContext const& pc) { + edm::ProcessContext const& context) { + callgraph_.preBeginJob(pathsAndConsumes, context); nvtxDomainMark(global_domain_, "preBeginJob"); - // FIXME this probably works only in the absence of subprocesses - // size() + 1 because pathsAndConsumes.allModules() does not include the source - unsigned int modules = pathsAndConsumes.allModules().size() + 1; + // this assumes that preBeginJob is not called concurrently with the modules' beginJob method + // or the preBeginJob for a subprocess + unsigned int modules = callgraph_.size(); global_modules_.resize(modules, nvtxInvalidRangeId); for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) { stream_modules_[sid].resize(modules, nvtxInvalidRangeId); @@ -1115,6 +1120,8 @@ void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, ed } void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) { + callgraph_.preSourceConstruction(desc); + if (not skipFirstEvent_) { auto mid = desc.id(); global_modules_.grow_to_at_least(mid + 1);