Improved PFID model selection consistency.

The model index used to evaluate the candidate is now saved in the DNNHelper output and used in the producer to select how many DNN outputs should be saved, without performing again the pt/eta binning. Moreover the eta selection is now performed with SuperCluster.eta instead of Electron.eta.
cms-sw · Jun 13, 2022 · acc34c7 · acc34c7
1 parent 9c0c506
commit acc34c7
Show file tree

Hide file tree

Showing 9 changed files with 30 additions and 25 deletions.
diff --git a/RecoEgamma/EgammaElectronProducers/plugins/GsfElectronProducer.cc b/RecoEgamma/EgammaElectronProducers/plugins/GsfElectronProducer.cc
@@ -56,17 +56,19 @@ namespace {
       const auto& dnn_ele_pfid = hoc->iElectronDNNEstimator->evaluate(electrons, tfSessions);
       int jele = 0;
       for (auto& el : electrons) {
-        const auto& values = dnn_ele_pfid[jele];
+        const auto& [iModel, values] = dnn_ele_pfid[jele];
         // get the previous values
         auto& mvaOutput = mva_outputs[jele];
 
-        if (abs(el.superCluster()->eta()) <= extetaboundary) {
+        if (iModel <= 3) {  // models 0,1,2,3 have 5 outputs in this version
+          assert(values.size() == 5);
           mvaOutput.dnn_e_sigIsolated = values[0];
           mvaOutput.dnn_e_sigNonIsolated = values[1];
           mvaOutput.dnn_e_bkgNonIsolated = values[2];
           mvaOutput.dnn_e_bkgTau = values[3];
           mvaOutput.dnn_e_bkgPhoton = values[4];
-        } else {
+        } else if (iModel == 4) {  //etaExtended model has 3 outputs
+          assert(values.size() == 3);
           mvaOutput.dnn_e_sigIsolated = values[0];
           mvaOutput.dnn_e_sigNonIsolated = 0.0;
           mvaOutput.dnn_e_bkgNonIsolated = values[1];

diff --git a/RecoEgamma/EgammaElectronProducers/python/gedGsfElectrons_cfi.py b/RecoEgamma/EgammaElectronProducers/python/gedGsfElectrons_cfi.py
@@ -36,8 +36,8 @@
             "RecoEgamma/ElectronIdentification/data/Ele_PFID_dnn/Run3Summer21_120X/EE_highpT/endcap_highpT_scaler.txt",
             "RecoEgamma/ElectronIdentification/data/Ele_PFID_dnn/Run3Winter22_122X/exteta1/scaler.txt",
             "RecoEgamma/ElectronIdentification/data/Ele_PFID_dnn/Run3Winter22_122X/exteta2/scaler.txt"
-
-        ]
+        ],
+        outputDim = [5,5,5,5,3]
     )    
 )
 

diff --git a/RecoEgamma/EgammaPhotonProducers/src/GEDPhotonProducer.cc b/RecoEgamma/EgammaPhotonProducers/src/GEDPhotonProducer.cc
@@ -1030,8 +1030,9 @@ void GEDPhotonProducer::fillPhotonCollection(edm::Event& evt,
     const auto& dnn_photon_pfid = globalCache()->photonDNNEstimator->evaluate(outputPhotonCollection, tfSessions_);
     size_t ipho = 0;
     for (auto& photon : outputPhotonCollection) {
-      const auto& values = dnn_photon_pfid[ipho];
+      const auto& [iModel, values] = dnn_photon_pfid[ipho];
       reco::Photon::PflowIDVariables pfID;
+      // The model index it is not useful for the moment
       pfID.dnn = values[0];
       photon.setPflowIDVariables(pfID);
       ipho++;

diff --git a/RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h b/RecoEgamma/EgammaTools/interface/EgammaDNNHelper.h
@@ -49,8 +49,9 @@ namespace egammaTools {
     // which has access to all the variables.
     std::pair<uint, std::vector<float>> getScaledInputs(const std::map<std::string, float>& variables) const;
 
-    std::vector<std::vector<float>> evaluate(const std::vector<std::map<std::string, float>>& candidates,
-                                             const std::vector<tensorflow::Session*>& sessions) const;
+    std::vector<std::pair<uint, std::vector<float>>> evaluate(
+        const std::vector<std::map<std::string, float>>& candidates,
+        const std::vector<tensorflow::Session*>& sessions) const;
 
   private:
     void initTensorFlowGraphs();

diff --git a/RecoEgamma/EgammaTools/src/EgammaDNNHelper.cc b/RecoEgamma/EgammaTools/src/EgammaDNNHelper.cc
@@ -97,8 +97,9 @@ std::pair<uint, std::vector<float>> EgammaDNNHelper::getScaledInputs(
   return std::make_pair(modelIndex, inputs);
 }
 
-std::vector<std::vector<float>> EgammaDNNHelper::evaluate(const std::vector<std::map<std::string, float>>& candidates,
-                                                          const std::vector<tensorflow::Session*>& sessions) const {
+std::vector<std::pair<uint, std::vector<float>>> EgammaDNNHelper::evaluate(
+    const std::vector<std::map<std::string, float>>& candidates,
+    const std::vector<tensorflow::Session*>& sessions) const {
   /*
     Evaluate the PFID DNN for all the electrons/photons. 
     nModels_ are defined depending on modelIndex  --> we need to build N input tensors to evaluate
@@ -109,17 +110,17 @@ std::vector<std::vector<float>> EgammaDNNHelper::evaluate(const std::vector<std:
     2) Prepare the input tensors for the  models
     3) Run the models and get the output for each candidate
     4) Sort the output by candidate index
-    5) Return the DNN outputs 
+    5) Return the DNN outputs along with the model index used on it
 
     */
   size_t nCandidates = candidates.size();
-  std::vector<std::vector<int>> indexMap(nModels_);  // for each model; the list of candidate index is saved
+  std::vector<std::vector<uint>> indexMap(nModels_);  // for each model; the list of candidate index is saved
   std::vector<std::vector<float>> inputsVectors(nCandidates);
   std::vector<uint> counts(nModels_);
 
   LogDebug("EgammaDNNHelper") << "Working on " << nCandidates << " candidates";
 
-  int icand = 0;
+  uint icand = 0;
   for (auto& candidate : candidates) {
     LogDebug("EgammaDNNHelper") << "Working on candidate: " << icand;
     const auto& [model_index, inputs] = getScaledInputs(candidate);
@@ -152,8 +153,8 @@ std::vector<std::vector<float>> EgammaDNNHelper::evaluate(const std::vector<std:
   }
 
   // Define the output and run
-  // Define the output and run
-  std::vector<std::pair<int, std::vector<float>>> outputs;
+  // The initial output is [(cand_index,(model_index, outputs)),.. ]
+  std::vector<std::pair<uint, std::pair<uint, std::vector<float>>>> outputs;
   // Run all the models
   for (size_t m = 0; m < nModels_; m++) {
     if (counts[m] == 0)
@@ -174,12 +175,12 @@ std::vector<std::vector<float>> EgammaDNNHelper::evaluate(const std::vector<std:
       }
       // Get the original index of the electorn in the original order
       const auto cand_index = indexMap[m][b];
-      outputs.push_back(std::make_pair(cand_index, result));
+      outputs.push_back(std::make_pair(cand_index, std::make_pair(m, result)));
     }
   }
   // Now we have just to re-order the outputs
   std::sort(outputs.begin(), outputs.end());
-  std::vector<std::vector<float>> final_outputs(outputs.size());
+  std::vector<std::pair<uint, std::vector<float>>> final_outputs(outputs.size());
   std::transform(outputs.begin(), outputs.end(), final_outputs.begin(), [](auto a) { return a.second; });
 
   return final_outputs;

diff --git a/RecoEgamma/ElectronIdentification/interface/ElectronDNNEstimator.h b/RecoEgamma/ElectronIdentification/interface/ElectronDNNEstimator.h
@@ -20,8 +20,8 @@ class ElectronDNNEstimator {
   std::map<std::string, float> getInputsVars(const reco::GsfElectron& ele) const;
 
   // Evaluate the DNN on all the electrons with the correct model
-  std::vector<std::vector<float>> evaluate(const reco::GsfElectronCollection& ele,
-                                           const std::vector<tensorflow::Session*>& sessions) const;
+  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::GsfElectronCollection& ele,
+                                                            const std::vector<tensorflow::Session*>& sessions) const;
 
   // List of input variables names used to check the variables request as
   // inputs in a dynamic way from configuration file.

diff --git a/RecoEgamma/ElectronIdentification/src/ElectronDNNEstimator.cc b/RecoEgamma/ElectronIdentification/src/ElectronDNNEstimator.cc
@@ -15,7 +15,7 @@ inline uint electronModelSelector(
   Selection of the model to be applied on the electron based on pt/eta cuts or whatever selection
   */
   const auto pt = vars.at("pt");
-  const auto absEta = std::abs(vars.at("eta"));
+  const auto absEta = std::abs(vars.at("superCluster.eta"));
   if (absEta <= endcapBoundary) {
     if (pt < ptThr)
       return 0;
@@ -156,7 +156,7 @@ std::map<std::string, float> ElectronDNNEstimator::getInputsVars(const reco::Gsf
   return variables;
 }
 
-std::vector<std::vector<float>> ElectronDNNEstimator::evaluate(
+std::vector<std::pair<uint, std::vector<float>>> ElectronDNNEstimator::evaluate(
     const reco::GsfElectronCollection& electrons, const std::vector<tensorflow::Session*>& sessions) const {
   // Collect the map of variables for each candidate and call the dnnHelper
   // Scaling, model selection and running is performed in the helper

diff --git a/RecoEgamma/PhotonIdentification/interface/PhotonDNNEstimator.h b/RecoEgamma/PhotonIdentification/interface/PhotonDNNEstimator.h
@@ -21,8 +21,8 @@ class PhotonDNNEstimator {
   std::map<std::string, float> getInputsVars(const reco::Photon& ele) const;
 
   // Evaluate the DNN on all the electrons with the correct model
-  std::vector<std::vector<float>> evaluate(const reco::PhotonCollection& ele,
-                                           const std::vector<tensorflow::Session*>& sessions) const;
+  std::vector<std::pair<uint, std::vector<float>>> evaluate(const reco::PhotonCollection& ele,
+                                                            const std::vector<tensorflow::Session*>& sessions) const;
 
   // List of input variables names used to check the variables request as
   // inputs in a dynamic way from configuration file.

diff --git a/RecoEgamma/PhotonIdentification/src/PhotonDNNEstimator.cc b/RecoEgamma/PhotonIdentification/src/PhotonDNNEstimator.cc
@@ -66,8 +66,8 @@ std::map<std::string, float> PhotonDNNEstimator::getInputsVars(const reco::Photo
   return variables;
 }
 
-std::vector<std::vector<float>> PhotonDNNEstimator::evaluate(const reco::PhotonCollection& photons,
-                                                             const std::vector<tensorflow::Session*>& sessions) const {
+std::vector<std::pair<uint, std::vector<float>>> PhotonDNNEstimator::evaluate(
+    const reco::PhotonCollection& photons, const std::vector<tensorflow::Session*>& sessions) const {
   // Collect the map of variables for each candidate and call the dnnHelper
   // Scaling, model selection and running is performed in the helper
   std::vector<std::map<std::string, float>> inputs;