cms-sw · cmsbuild · Mar 26, 2021 · Mar 23, 2021 · Mar 24, 2021 · Mar 24, 2021
diff --git a/HLTrigger/Timer/plugins/FastTimerService.cc b/HLTrigger/Timer/plugins/FastTimerService.cc
@@ -34,6 +34,7 @@ using json = nlohmann::json;
 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
 #include "FWCore/ParameterSet/interface/ParameterSet.h"
 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/Exception.h"
 #include "FWCore/Utilities/interface/StreamID.h"
 #include "HLTrigger/Timer/interface/memory_usage.h"
 #include "HLTrigger/Timer/interface/processor_model.h"
@@ -766,7 +767,8 @@ void FastTimerService::PlotsPerJob::fill_lumi(AtomicResources const& data, unsig
 ///////////////////////////////////////////////////////////////////////////////
 
 FastTimerService::FastTimerService(const edm::ParameterSet& config, edm::ActivityRegistry& registry)
-    :  // configuration
+    : tbb::task_scheduler_observer(true),
+      // configuration
       callgraph_(),
       // job configuration
       concurrent_lumis_(0),
@@ -1099,6 +1101,7 @@ void FastTimerService::postSourceLumi(edm::LuminosityBlockIndex index) {
 }
 
 void FastTimerService::postEndJob() {
+  guard_.finalize();
   if (print_job_summary_) {
     edm::LogVerbatim out("FastReport");
     printSummary(out, job_summary_, "Job");
@@ -1662,17 +1665,61 @@ void FastTimerService::postModuleStreamEndLumi(edm::StreamContext const& sc, edm
   thread().measure_and_accumulate(lumi_transition_[index]);
 }
 
-void FastTimerService::on_scheduler_entry(bool worker) {
-  // initialise the measurement point for a thread that has newly joining the TBB pool
-  thread().measure();
+FastTimerService::ThreadGuard::ThreadGuard() {
+  auto err = ::pthread_key_create(&key_, retire_thread);
+  if (err) {
+    throw cms::Exception("FastTimerService") << "ThreadGuard key creation failed: " << ::strerror(err);
+  }
+}
+
+// If this is a new thread, register it and return true
+bool FastTimerService::ThreadGuard::register_thread(FastTimerService::AtomicResources& r) {
+  auto ptr = ::pthread_getspecific(key_);
+
+  if (not ptr) {
+    auto p = thread_resources_.emplace_back(std::make_unique<specific_t>(r));
+    auto err = ::pthread_setspecific(key_, p->get());
+    if (err) {
+      throw cms::Exception("FastTimerService") << "ThreadGuard pthread_setspecific failed: " << ::strerror(err);
+    }
+    return true;
+  }
+  return false;
 }
 
-void FastTimerService::on_scheduler_exit(bool worker) {
+// called when a thread exits
+void FastTimerService::ThreadGuard::retire_thread(void* ptr) {
+  auto p = static_cast<specific_t*>(ptr);
   // account any resources used or freed by the thread before leaving the TBB pool
-  thread().measure_and_accumulate(overhead_);
+  p->measurement_.measure_and_accumulate(p->resource_);
+  p->live_ = false;
 }
 
-FastTimerService::Measurement& FastTimerService::thread() { return threads_.local(); }
+// finalize all threads that have not retired
+void FastTimerService::ThreadGuard::finalize() {
+  for (auto& p : thread_resources_) {
+    if (p->live_) {
+      p->measurement_.measure_and_accumulate(p->resource_);
+    }
+  }
+}
+
+FastTimerService::Measurement& FastTimerService::ThreadGuard::thread() {
+  auto ptr = ::pthread_getspecific(key_);
+  auto p = static_cast<ThreadGuard::specific_t*>(ptr);
+  return p->measurement_;
+}
+
+void FastTimerService::on_scheduler_entry(bool worker) {
+  if (guard_.register_thread(overhead_)) {
+    // initialise the measurement point for a thread that has newly joined the TBB pool
+    thread().measure();
+  }
+}
+
+void FastTimerService::on_scheduler_exit(bool worker) {}
+
+FastTimerService::Measurement& FastTimerService::thread() { return guard_.thread(); }
 
 // describe the module's configuration
 void FastTimerService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {

diff --git a/HLTrigger/Timer/plugins/FastTimerService.h b/HLTrigger/Timer/plugins/FastTimerService.h
@@ -3,6 +3,7 @@
 
 // system headers
 #include <unistd.h>
+#include <pthread.h>
 
 // C++ headers
 #include <chrono>
@@ -455,9 +456,32 @@ class FastTimerService : public tbb::task_scheduler_observer {
   std::vector<ResourcesPerJob> run_summary_;  // whole event time accounting per-run
   std::mutex summary_mutex_;                  // synchronise access to the summary objects across different threads
 
-  // per-thread quantities, lazily allocated
-  tbb::enumerable_thread_specific<Measurement, tbb::cache_aligned_allocator<Measurement>, tbb::ets_key_per_instance>
-      threads_;
+  //
+  struct ThreadGuard {
+    struct specific_t {
+      specific_t(AtomicResources& r) : resource_(r), live_(true) {}
+      ~specific_t() = default;
+
+      Measurement measurement_;
+      AtomicResources& resource_;
+      bool live_;
+    };
+
+    ThreadGuard();
+    ~ThreadGuard() = default;
+
+    static void retire_thread(void* t);
+
+    bool register_thread(FastTimerService::AtomicResources& r);
+    Measurement& thread();
+    void finalize();
+
+    tbb::concurrent_vector<std::unique_ptr<specific_t>> thread_resources_;
+    pthread_key_t key_;
+  };
+
+  //
+  ThreadGuard guard_;
 
   // atomic variables to keep track of the completion of each step, process by process
   std::unique_ptr<std::atomic<unsigned int>[]> subprocess_event_check_;