Skip to content

Commit

Permalink
Merge pull request #40723 from cms-tau-pog/CMSSW_12_5_X_tau-pog_deepT…
Browse files Browse the repository at this point in the history
…auPh2

Code to process phase2 version of deepTauID v2p5 [12_5_X]
  • Loading branch information
cmsbuild committed Feb 10, 2023
2 parents e0497b0 + b588e8c commit 1c0198a
Show file tree
Hide file tree
Showing 8 changed files with 475 additions and 27 deletions.
4 changes: 2 additions & 2 deletions PhysicsTools/PatAlgos/python/slimming/miniAOD_tools.py
Expand Up @@ -382,8 +382,8 @@ def _add_deepFlavour(process):
toKeep = ['deepTau2017v2p1','deepTau2018v2p5']
)
from Configuration.Eras.Modifier_phase2_common_cff import phase2_common #Phase2 Tau MVA
phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.append('newDMPhase2v1')) #Phase2 Tau isolation MVA
phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.append('againstElePhase2v1')) #Phase2 Tau anti-e MVA
_tauIds_phase2 = ['newDMPhase2v1','againstElePhase2v1']
phase2_common.toModify(tauIdEmbedder.toKeep, func=lambda t:t.extend(_tauIds_phase2))
tauIdEmbedder.runTauID()
addToProcessAndTask(_noUpdatedTauName, process.slimmedTaus.clone(),process,task)
delattr(process, 'slimmedTaus')
Expand Down
337 changes: 337 additions & 0 deletions RecoTauTag/RecoTau/interface/DeepTauScaling.h

Large diffs are not rendered by default.

45 changes: 32 additions & 13 deletions RecoTauTag/RecoTau/plugins/DeepTauId.cc
Expand Up @@ -881,6 +881,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
desc.add<std::vector<std::string>>("graph_file",
{"RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2017v2p6_e6.pb"});
desc.add<bool>("mem_mapped", false);
desc.add<unsigned>("year", 2017);
desc.add<unsigned>("version", 2);
desc.add<unsigned>("sub_version", 1);
desc.add<int>("debug_level", 0);
Expand Down Expand Up @@ -926,6 +927,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
pfTauTransverseImpactParameters_token_(
consumes<edm::AssociationVector<reco::PFTauRefProd, std::vector<reco::PFTauTransverseImpactParameterRef>>>(
cfg.getParameter<edm::InputTag>("pfTauTransverseImpactParameters"))),
year_(cfg.getParameter<unsigned>("year")),
version_(cfg.getParameter<unsigned>("version")),
sub_version_(cfg.getParameter<unsigned>("sub_version")),
debug_level(cfg.getParameter<int>("debug_level")),
Expand Down Expand Up @@ -957,7 +959,11 @@ class DeepTauId : public deep_tau::DeepTauBase {
tensorflow::TensorShape{1,
static_cast<int>(TauBlockInputs::NumberOfInputs) -
static_cast<int>(TauBlockInputs::varsToDrop.size())});
scalingParamsMap_ = &sc::scalingParamsMap_v2p5;
if (year_ == 2026) {
scalingParamsMap_ = &sc::scalingParamsMap_PhaseIIv2p5;
} else {
scalingParamsMap_ = &sc::scalingParamsMap_v2p5;
}
} else
throw cms::Exception("DeepTauId") << "subversion " << sub_version_ << " is not supported.";

Expand Down Expand Up @@ -1236,6 +1242,8 @@ class DeepTauId : public deep_tau::DeepTauBase {
edm::Handle<double> rho;
event.getByToken(rho_token_, rho);

auto const& eventnr = event.id().event();

tensorflow::Tensor predictions(tensorflow::DT_FLOAT, {static_cast<int>(taus->size()), deep_tau::NumberOfOutputs});

for (size_t tau_index = 0; tau_index < taus->size(); ++tau_index) {
Expand Down Expand Up @@ -1263,6 +1271,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
*pfCands,
vertices->at(0),
*rho,
eventnr,
pred_vector,
tauIDs);
} else
Expand All @@ -1274,6 +1283,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
*pfCands,
vertices->at(0),
*rho,
eventnr,
pred_vector,
tauIDs);
} else {
Expand Down Expand Up @@ -1310,13 +1320,15 @@ class DeepTauId : public deep_tau::DeepTauBase {
const edm::View<reco::Candidate>& pfCands,
const reco::Vertex& pv,
double rho,
const edm::EventNumber_t& eventnr,
std::vector<tensorflow::Tensor>& pred_vector,
TauFunc tau_funcs) {
using namespace dnn_inputs_v2;
if (debug_level >= 2) {
std::cout << "<DeepTauId::getPredictionsV2 (moduleLabel = " << moduleDescription().moduleLabel()
<< ")>:" << std::endl;
std::cout << " tau: pT = " << tau.pt() << ", eta = " << tau.eta() << ", phi = " << tau.phi() << std::endl;
std::cout << " tau: pT = " << tau.pt() << ", eta = " << tau.eta() << ", phi = " << tau.phi()
<< ", eventnr = " << eventnr << std::endl;
}
CellGrid inner_grid(number_of_inner_cell, number_of_inner_cell, 0.02, 0.02, disable_CellIndex_workaround_);
CellGrid outer_grid(number_of_outer_cell, number_of_outer_cell, 0.05, 0.05, disable_CellIndex_workaround_);
Expand Down Expand Up @@ -1357,7 +1369,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
checkInputs(*hadronsTensor_[false], "input_outer_hadrons", HadronBlockInputs::NumberOfInputs, &outer_grid);

if (save_inputs_) {
std::string json_file_name = "DeepTauId_" + std::to_string(file_counter_) + ".json";
std::string json_file_name = "DeepTauId_" + std::to_string(eventnr) + "_" + std::to_string(tau_index) + ".json";
json_file_ = new std::ofstream(json_file_name.data());
is_first_block_ = true;
(*json_file_) << "{";
Expand Down Expand Up @@ -1942,16 +1954,22 @@ class DeepTauId : public deep_tau::DeepTauBase {
sp.scale(ele.deltaPhiSuperClusterTrackAtVtx(), dnn::ele_deltaPhiSuperClusterTrackAtVtx - e_index_offset);
get(dnn::ele_deltaPhiSeedClusterTrackAtCalo + fill_index_offset_e) =
sp.scale(ele.deltaPhiSeedClusterTrackAtCalo(), dnn::ele_deltaPhiSeedClusterTrackAtCalo - e_index_offset);
get(dnn::ele_mvaInput_earlyBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().earlyBrem, dnn::ele_mvaInput_earlyBrem - e_index_offset);
get(dnn::ele_mvaInput_lateBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().lateBrem, dnn::ele_mvaInput_lateBrem - e_index_offset);
get(dnn::ele_mvaInput_sigmaEtaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().sigmaEtaEta, dnn::ele_mvaInput_sigmaEtaEta - e_index_offset);
get(dnn::ele_mvaInput_hadEnergy + fill_index_offset_e) =
sp.scale(ele.mvaInput().hadEnergy, dnn::ele_mvaInput_hadEnergy - e_index_offset);
get(dnn::ele_mvaInput_deltaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().deltaEta, dnn::ele_mvaInput_deltaEta - e_index_offset);
const bool mva_valid =
(ele.mvaInput().earlyBrem > -2) ||
(year_ !=
2026); // Known issue that input can be invalid in Phase2 samples (early/lateBrem==-2, hadEnergy==0, sigmaEtaEta/deltaEta==3.40282e+38). Unknown if also in Run2/3, so don't change there
if (mva_valid) {
get(dnn::ele_mvaInput_earlyBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().earlyBrem, dnn::ele_mvaInput_earlyBrem - e_index_offset);
get(dnn::ele_mvaInput_lateBrem + fill_index_offset_e) =
sp.scale(ele.mvaInput().lateBrem, dnn::ele_mvaInput_lateBrem - e_index_offset);
get(dnn::ele_mvaInput_sigmaEtaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().sigmaEtaEta, dnn::ele_mvaInput_sigmaEtaEta - e_index_offset);
get(dnn::ele_mvaInput_hadEnergy + fill_index_offset_e) =
sp.scale(ele.mvaInput().hadEnergy, dnn::ele_mvaInput_hadEnergy - e_index_offset);
get(dnn::ele_mvaInput_deltaEta + fill_index_offset_e) =
sp.scale(ele.mvaInput().deltaEta, dnn::ele_mvaInput_deltaEta - e_index_offset);
}
const auto& gsfTrack = ele.gsfTrack();
if (gsfTrack.isNonnull()) {
get(dnn::ele_gsfTrack_normalizedChi2 + fill_index_offset_e) =
Expand Down Expand Up @@ -2419,6 +2437,7 @@ class DeepTauId : public deep_tau::DeepTauBase {
edm::EDGetTokenT<edm::AssociationVector<reco::PFTauRefProd, std::vector<reco::PFTauTransverseImpactParameterRef>>>
pfTauTransverseImpactParameters_token_;
std::string input_layer_, output_layer_;
const unsigned year_;
const unsigned version_;
const unsigned sub_version_;
const int debug_level;
Expand Down
@@ -1,9 +1,8 @@
import FWCore.ParameterSet.Config as cms

# Electron collection merger
mergedSlimmedElectronsForTauId = cms.EDProducer('PATElectronCollectionMerger',
src = cms.VInputTag('slimmedElectrons', 'slimmedElectronsHGC')
)
from RecoTauTag.RecoTau.mergedPhase2SlimmedElectronsForTauId_cff import mergedSlimmedElectronsForTauId

# anti-e phase-2 tauID (Raw)
from RecoTauTag.RecoTau.tauDiscriminationAgainstElectronMVA6Phase2_mvaDefs_cff import mvaNames_phase2, mapping_phase2, workingPoints_phase2
from RecoTauTag.RecoTau.TauDiscriminatorTools import noPrediscriminants
Expand Down
@@ -0,0 +1,6 @@
import FWCore.ParameterSet.Config as cms

# Electron collection merger
mergedSlimmedElectronsForTauId = cms.EDProducer('PATElectronCollectionMerger',
src = cms.VInputTag('slimmedElectrons', 'slimmedElectronsHGC')
)
29 changes: 29 additions & 0 deletions RecoTauTag/RecoTau/python/tauIdWPsDefs.py
Expand Up @@ -55,3 +55,32 @@
"VVTight": 0.9931
}
}

WORKING_POINTS_PHASEII_v2p5 = {
"e": {
"VVVLoose": 0.2376,
"VVLoose": 0.3688,
"VLoose": 0.5336,
"Loose": 0.8116,
"Medium": 0.9268,
"Tight": 0.9781,
"VTight": 0.9915,
"VVTight": 0.9961
},
"mu": {
"VLoose": 0.0640,
"Loose": 0.0942,
"Medium": 0.5494,
"Tight": 0.9401
},
"jet": {
"VVVLoose": 0.4918,
"VVLoose": 0.6920,
"VLoose": 0.8299,
"Loose": 0.9166,
"Medium": 0.9507,
"Tight": 0.9683,
"VTight": 0.9788,
"VVTight": 0.9856
}
}
53 changes: 47 additions & 6 deletions RecoTauTag/RecoTau/python/tools/runTauIdMVA.py
Expand Up @@ -3,7 +3,7 @@
from RecoTauTag.RecoTau.TauDiscriminatorTools import noPrediscriminants
from RecoTauTag.RecoTau.PATTauDiscriminationByMVAIsolationRun2_cff import patDiscriminationByIsolationMVArun2v1raw, patDiscriminationByIsolationMVArun2v1
from RecoTauTag.RecoTau.DeepTau_cfi import DeepTau
from RecoTauTag.RecoTau.tauIdWPsDefs import WORKING_POINTS_v2p1, WORKING_POINTS_v2p5
from RecoTauTag.RecoTau.tauIdWPsDefs import WORKING_POINTS_v2p1, WORKING_POINTS_v2p5, WORKING_POINTS_PHASEII_v2p5

import os
import re
Expand All @@ -12,7 +12,7 @@ class TauIDEmbedder(object):
"""class to rerun the tau seq and acces trainings from the database"""
availableDiscriminators = [
"2017v1", "2017v2", "newDM2017v2", "dR0p32017v2", "2016v1", "newDM2016v1",
"deepTau2017v2", "deepTau2017v2p1", "deepTau2018v2p5",
"deepTau2017v2", "deepTau2017v2p1", "deepTau2018v2p5", "deepTau2026v2p5",
"againstEle2018",
"newDMPhase2v1",
"againstElePhase2v1"
Expand All @@ -22,7 +22,7 @@ def __init__(self, process, debug = False,
originalTauName = "slimmedTaus",
updatedTauName = "slimmedTausNewID",
postfix = "",
toKeep = ["deepTau2017v2p1", "deepTau2018v2p5"],
toKeep = ["deepTau2017v2p1", "deepTau2018v2p5", "deepTau2026v2p5"],
tauIdDiscrMVA_trainings_run2_2017 = { 'tauIdMVAIsoDBoldDMwLT2017' : "tauIdMVAIsoDBoldDMwLT2017", },
tauIdDiscrMVA_WPs_run2_2017 = {
'tauIdMVAIsoDBoldDMwLT2017' : {
Expand Down Expand Up @@ -560,7 +560,7 @@ def runTauID(self):
tauIDSources.byVVTightIsolationMVArun2v1DBnewDMwLT2016 = self.tauIDMVAinputs(_byIsolationNewDMMVArun2016v1, "_WPEff40")

if "deepTau2017v2" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2 IDs")

_deepTauName = "deepTau2017v2"
workingPoints_ = WORKING_POINTS_v2p1
Expand All @@ -575,6 +575,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = 1 #MB: subversion cannot be properly deduced from file names; it should be 1 also for v2
))
Expand All @@ -587,7 +588,7 @@ def runTauID(self):


if "deepTau2017v2p1" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2p1 IDs")

_deepTauName = "deepTau2017v2p1"
workingPoints_ = WORKING_POINTS_v2p1
Expand All @@ -602,6 +603,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = 1, #MB: subversion cannot be properly deduced from file names
disable_dxy_pca = True
Expand All @@ -614,7 +616,7 @@ def runTauID(self):
_rerunMvaIsolationSequence += _deepTauProducer

if "deepTau2018v2p5" in self.toKeep:
if self.debug: print ("Adding DeepTau IDs")
if self.debug: print ("Adding DeepTau v2p5 IDs")

_deepTauName = "deepTau2018v2p5"
workingPoints_ = WORKING_POINTS_v2p5
Expand All @@ -629,6 +631,7 @@ def runTauID(self):
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = full_version[2],
disable_dxy_pca = True,
Expand All @@ -642,6 +645,44 @@ def runTauID(self):
_rerunMvaIsolationTask.add(_deepTauProducer)
_rerunMvaIsolationSequence += _deepTauProducer

if "deepTau2026v2p5" in self.toKeep:
if self.debug: print ("Adding Phase2 DeepTau v2p5 IDs")

_deepTauName = "deepTau2026v2p5"
workingPoints_ = WORKING_POINTS_PHASEII_v2p5

file_names = [
'core:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_core.pb',
'inner:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_inner.pb',
'outer:RecoTauTag/TrainingFiles/data/DeepTauId/deepTau_2026v2p5_outer.pb',
]
full_version = self.getDeepTauVersion(file_names[0])
setattr(self.process,_deepTauName+self.postfix,DeepTau.clone(
Prediscriminants = noPrediscriminants,
taus = self.originalTauName,
graph_file = file_names,
year = full_version[0],
version = full_version[1],
sub_version = full_version[2],
disable_dxy_pca = True,
disable_hcalFraction_workaround = True,
disable_CellIndex_workaround = True
))

from RecoTauTag.RecoTau.mergedPhase2SlimmedElectronsForTauId_cff import mergedSlimmedElectronsForTauId
if not hasattr(self.process,"mergedSlimmedElectronsForTauId"):
self.process.mergedSlimmedElectronsForTauId = mergedSlimmedElectronsForTauId
setattr(getattr(self.process, _deepTauName+self.postfix), "electrons", cms.InputTag("mergedSlimmedElectronsForTauId"))
setattr(getattr(self.process, _deepTauName+self.postfix), "vertices", cms.InputTag("offlineSlimmedPrimaryVertices4D"))

self.processDeepProducer(_deepTauName, tauIDSources, workingPoints_)

_deepTauProducer = getattr(self.process,_deepTauName+self.postfix)
_rerunMvaIsolationTask.add(self.process.mergedSlimmedElectronsForTauId)
_rerunMvaIsolationTask.add(_deepTauProducer)
_rerunMvaIsolationSequence += self.process.mergedSlimmedElectronsForTauId
_rerunMvaIsolationSequence += _deepTauProducer

if "againstEle2018" in self.toKeep:
antiElectronDiscrMVA6_version = "MVA6v3_noeveto"
### Define new anti-e discriminants
Expand Down
23 changes: 20 additions & 3 deletions RecoTauTag/RecoTau/test/runDeepTauIDsOnMiniAOD.py
Expand Up @@ -10,20 +10,27 @@
minimalOutput = True
eventsToProcess = 100
nThreads = 1
phase2 = False

process = cms.Process('TauID')
process.load('Configuration.StandardSequences.MagneticField_cff')
process.load('Configuration.Geometry.GeometryRecoDB_cff')
process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff')
process.load('Configuration.StandardSequences.EndOfProcess_cff')

from Configuration.AlCa.GlobalTag import GlobalTag
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase1_2018_realistic', '')
if phase2:
process.load('Configuration.Geometry.GeometryExtended2026D88Reco_cff')
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase2_realistic_T25', '')
inputfile = '/store/mc/Phase2Spring21DRMiniAOD/TTbar_TuneCP5_14TeV-pythia8/MINIAODSIM/PU200Phase2D80_113X_mcRun4_realistic_T25_v1_ext1-v1/280000/04e6741c-489a-4fed-9e0c-d7703c274b5a.root'
else:
process.load('Configuration.Geometry.GeometryRecoDB_cff')
process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:phase1_2018_realistic', '')
inputfile = '/store/mc/RunIISummer20UL18MiniAOD/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/MINIAODSIM/106X_upgrade2018_realistic_v11_L1v1-v2/00000/009636D7-07B2-DB49-882D-C251FD62CCE7.root'

# Input source
process.source = cms.Source('PoolSource', fileNames = cms.untracked.vstring(
# File from dataset TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8
'/store/mc/RunIISummer20UL18MiniAOD/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/MINIAODSIM/106X_upgrade2018_realistic_v11_L1v1-v2/00000/009636D7-07B2-DB49-882D-C251FD62CCE7.root'
inputfile
))

process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(eventsToProcess) )
Expand All @@ -38,6 +45,12 @@
# "DPFTau_2016_v1",
"againstEle2018",
]
if phase2:
toKeep = [ "newDMPhase2v1",
# "deepTau2018v2p5",
"deepTau2026v2p5",
"againstElePhase2v1",
]
tauIdEmbedder = tauIdConfig.TauIDEmbedder(process, debug = False,
updatedTauName = updatedTauName,
toKeep = toKeep)
Expand Down Expand Up @@ -67,6 +80,10 @@
process.out.outputCommands.append("keep *_"+updatedTauName+"_*_*")
process.out.outputCommands.append("keep *_"+updatedTauName+postfix+"_*_*")

# Adapt to old phase2 input samples where slimmedElectronsHGC are called slimmedElectronsFromMultiCl
if phase2:
process.mergedSlimmedElectronsForTauId.src = ["slimmedElectrons","slimmedElectronsFromMultiCl"]

# Path and EndPath definitions
process.p = cms.Path(
process.rerunMvaIsolationSequence *
Expand Down

0 comments on commit 1c0198a

Please sign in to comment.