Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the NVProfilerService and ProcessCallGraph [12.4.x] #39399

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions HLTrigger/Timer/interface/ProcessCallGraph.h
Expand Up @@ -120,7 +120,7 @@ class ProcessCallGraph {

public:
// default c'tor
ProcessCallGraph();
ProcessCallGraph() = default;

// to be called from preSourceConstruction(...)
void preSourceConstruction(edm::ModuleDescription const &);
Expand Down Expand Up @@ -172,7 +172,7 @@ class ProcessCallGraph {
GraphType graph_;

// module id of the Source
unsigned int source_;
unsigned int source_ = edm::ModuleDescription::invalidID();

// map each (sub)process name to a "process id"
std::unordered_map<std::string, unsigned int> process_id_;
Expand Down
19 changes: 10 additions & 9 deletions HLTrigger/Timer/src/ProcessCallGraph.cc
Expand Up @@ -29,8 +29,6 @@
#include "FWCore/Utilities/interface/EDMException.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"

ProcessCallGraph::ProcessCallGraph() = default;

// adaptor to use range-based for loops with boost::graph edges(...) and vertices(...) functions
template <typename I>
struct iterator_pair_as_a_range : std::pair<I, I> {
Expand All @@ -46,9 +44,10 @@ iterator_pair_as_a_range<I> make_range(std::pair<I, I> p) {
return iterator_pair_as_a_range<I>(p);
}

// FIXME
// - check that the Source has not already been added
void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& module) {
// check that the Source has not already been added
assert(source_ == edm::ModuleDescription::invalidID());

// keep track of the Source module id
source_ = module.id();

Expand All @@ -58,13 +57,15 @@ void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& modul
}

// FIXME
// - check that the Source has already been added
// - check that all module ids are valid (e.g. subprocesses are not being added in
// the wrong order)
void ProcessCallGraph::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
edm::ProcessContext const& context) {
unsigned int pid = registerProcess(context);

// check that the Source has already been added
assert(source_ != edm::ModuleDescription::invalidID());

// work on the full graph (for the main process) or a subgraph (for a subprocess)
GraphType& graph = context.isSubProcess() ? graph_.create_subgraph() : graph_.root();

Expand Down Expand Up @@ -227,10 +228,8 @@ std::pair<std::vector<unsigned int>, std::vector<unsigned int>> ProcessCallGraph
}

// register a (sub)process and assigns it a "process id"
// if called with a duplicate process name, returns the original process id
// throws an exception if called with a duplicate process name
unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& context) {
static unsigned int s_id = 0;

// registerProcess (called by preBeginJob) must be called for the parent process before its subprocess(es)
if (context.isSubProcess() and process_id_.find(context.parentProcessContext().processName()) == process_id_.end()) {
throw edm::Exception(edm::errors::LogicError)
Expand All @@ -246,7 +245,9 @@ unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& contex
<< (context.isSubProcess() ? "subprocess" : "process") << " " << context.processName();
}

std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), s_id++));
// this assumes that registerProcess (called by preBeginJob) is not called concurrently from different threads
// otherwise, process_id_.size() should be replaces with an atomic counter
std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), process_id_.size()));
return id->second;
}

Expand Down
1 change: 1 addition & 0 deletions HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
Expand Up @@ -7,6 +7,7 @@
<use name="FWCore/ServiceRegistry"/>
<use name="FWCore/Utilities"/>
<use name="HeterogeneousCore/CUDAServices"/>
<use name="HLTrigger/Timer"/>

<library file="*.cc" name="HeterogeneousCoreCUDAServicesPlugins">
<flags EDM_PLUGIN="1"/>
Expand Down
17 changes: 12 additions & 5 deletions HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
Expand Up @@ -41,6 +41,7 @@
#include "FWCore/Utilities/interface/ProductKindOfType.h"
#include "FWCore/Utilities/interface/TimeOfDay.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"

using namespace std::string_literals;

Expand Down Expand Up @@ -287,6 +288,9 @@ class NVProfilerService {
return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
}

// build a complete representation of the modules in the whole job
ProcessCallGraph callgraph_;

std::vector<std::string> highlightModules_;
const bool showModulePrefetching_;
const bool skipFirstEvent_;
Expand Down Expand Up @@ -502,7 +506,7 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
std::stringstream out;
out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, "
<< bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams()
<< " streams\nrunning on" << bounds.maxNumberOfThreads() << " threads";
<< " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads";
nvtxDomainMark(global_domain_, out.str().c_str());

auto concurrentStreams = bounds.maxNumberOfStreams();
Expand All @@ -524,12 +528,13 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
}

void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
edm::ProcessContext const& pc) {
edm::ProcessContext const& context) {
callgraph_.preBeginJob(pathsAndConsumes, context);
nvtxDomainMark(global_domain_, "preBeginJob");

// FIXME this probably works only in the absence of subprocesses
// size() + 1 because pathsAndConsumes.allModules() does not include the source
unsigned int modules = pathsAndConsumes.allModules().size() + 1;
// this assumes that preBeginJob is not called concurrently with the modules' beginJob method
// or the preBeginJob for a subprocess
unsigned int modules = callgraph_.size();
global_modules_.resize(modules, nvtxInvalidRangeId);
for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) {
stream_modules_[sid].resize(modules, nvtxInvalidRangeId);
Expand Down Expand Up @@ -1115,6 +1120,8 @@ void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, ed
}

void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) {
callgraph_.preSourceConstruction(desc);

if (not skipFirstEvent_) {
auto mid = desc.id();
global_modules_.grow_to_at_least(mid + 1);
Expand Down