Skip to content

Commit

Permalink
Merge pull request #18315 from mverzett/DeepCSVPhaseI-fromCMSSW-91X
Browse files Browse the repository at this point in the history
Phase I Deep CSV Training
  • Loading branch information
davidlange6 committed Apr 23, 2017
2 parents 8d5a2e7 + c9793cb commit 21c2125
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 53 deletions.
14 changes: 7 additions & 7 deletions PhysicsTools/PatAlgos/python/patInputFiles_cff.py
Expand Up @@ -11,8 +11,8 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/RelValTTbar_13/MINIAODSIM/PU25ns_80X_mcRun2_asymptotic_v4-v1/10000/A65CD249-BFDA-E511-813A-0025905A6066.root'
)
'/store/relval/CMSSW_9_1_0_pre2/RelValTTbar_13/MINIAODSIM/PU25ns_90X_mcRun2_asymptotic_v5-v1/00000/A868B9C9-2C1A-E711-ADC4-0025905B8604.root', #crasha patTuple_updateJets_fromMiniAOD_cfg.py [1]
)

# /RelValProdTTbar_13/CMSSW_8_0_0-80X_mcRun2_asymptotic_v4-v1/AODSIM
filesRelValProdTTbarAODSIM = cms.untracked.vstring(
Expand All @@ -24,7 +24,7 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/RelValProdTTbar_13/AODSIM/80X_mcRun2_asymptotic_v4-v1/10000/DE81ABBF-1DDA-E511-8AF8-0026189438B5.root'
'/store/relval/CMSSW_9_1_0_pre2/RelValTTbar_13/GEN-SIM-RECO/90X_upgrade2017_realistic_v20-v2/00000/2257937F-3019-E711-BF48-0CC47A4D7678.root'
)

# /RelValTTbar_13/CMSSW_8_0_0-80X_mcRun2_asymptotic_v4-v1/GEN-SIM-RECO
Expand All @@ -37,7 +37,7 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/RelValTTbar_13/GEN-SIM-RECO/80X_mcRun2_asymptotic_v4-v1/10000/1C687FB0-7BD9-E511-AFED-0CC47A78A4BA.root'
'/store/relval/CMSSW_9_1_0_pre2/RelValTTbar_13/GEN-SIM-RECO/90X_upgrade2017_realistic_v20-v2/00000/2257937F-3019-E711-BF48-0CC47A4D7678.root'
)

# /RelValTTbar_13/CMSSW_8_0_0-PU25ns_80X_mcRun2_asymptotic_v4_FastSim-v2/GEN-SIM-DIGI-RECO
Expand All @@ -50,7 +50,7 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/RelValTTbar_13/GEN-SIM-DIGI-RECO/PU25ns_80X_mcRun2_asymptotic_v4_FastSim-v2/10000/00AADAB6-63DD-E511-8C34-002618943953.root'
'/store/relval/CMSSW_9_1_0_pre2/RelValTTbar_13/GEN-SIM-DIGI-RECO/PU25ns_90X_mcRun2_asymptotic_v5_FastSim-v1/00000/0039AD51-331A-E711-863C-0025905A60B4.root',
)

# /RelValTTbar_13/CMSSW_8_0_0-PU25ns_80X_mcRun2_asymptotic_v4-v1/GEN-SIM-RECO
Expand All @@ -63,7 +63,7 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/RelValTTbar_13/GEN-SIM-RECO/PU25ns_80X_mcRun2_asymptotic_v4-v1/10000/42D6DF66-9DDA-E511-9200-0CC47A4D7670.root'
'/store/relval/CMSSW_9_1_0_pre2/RelValTTbar_13/GEN-SIM-RECO/PU25ns_90X_mcRun2_asymptotic_v5-v1/00000/E27EA6EC-2B1A-E711-84EC-0CC47A4D75EC.root',
)

# /SingleMu/CMSSW_8_0_0-80X_dataRun2_v5_RelVal_mu2012D-v3/MINIAOD
Expand All @@ -76,7 +76,7 @@
#, numberOfFiles = 1
#, useDAS = True
#)
'/store/relval/CMSSW_8_0_0/SingleMu/MINIAOD/80X_dataRun2_v5_RelVal_mu2012D-v3/10000/06A44F40-ECDD-E511-89D7-0CC47A78A3D8.root'
'/store/relval/CMSSW_9_1_0_pre2/SingleMuon/MINIAOD/90X_dataRun2_relval_v6_RelVal_sigMu2016E-v1/00000/96231232-361A-E711-96B5-0CC47A7C3430.root'
)

# /SingleMu/CMSSW_8_0_0-80X_dataRun2_v5_RelVal_mu2012D-v3/RECO
Expand Down
11 changes: 4 additions & 7 deletions PhysicsTools/PatAlgos/python/producersLayer1/jetProducer_cfi.py
Expand Up @@ -49,14 +49,11 @@
# CTagging
cms.InputTag('pfCombinedCvsLJetTags'),
cms.InputTag('pfCombinedCvsBJetTags'),
# The following code is commented-out to avoid breaking any unit test
# waiting for a set of AOD RelVals which have the jet tags in the event content
# DeepFlavour
# cms.InputTag('pfDeepCSVJetTags:probb'),
# cms.InputTag('pfDeepCSVJetTags:probc'),
# cms.InputTag('pfDeepCSVJetTags:probudsg'),
# cms.InputTag('pfDeepCSVJetTags:probbb'),
# cms.InputTag('pfDeepCSVJetTags:probcc'),
cms.InputTag('pfDeepCSVJetTags:probb'),
cms.InputTag('pfDeepCSVJetTags:probc'),
cms.InputTag('pfDeepCSVJetTags:probudsg'),
cms.InputTag('pfDeepCSVJetTags:probbb'),
# DeepCMVA
# cms.InputTag('pfDeepCMVAJetTags:probb'),
# cms.InputTag('pfDeepCMVAJetTags:probc'),
Expand Down
Expand Up @@ -63,7 +63,7 @@ def applySubstructure( process, postfix="" ) :
jetSource = cms.InputTag('ak8PFJetsPuppi'+postfix),
algo= 'AK', rParam = 0.8,
jetCorrections = ('AK8PFPuppi', cms.vstring(['L2Relative', 'L3Absolute']), 'None'),
btagDiscriminators = ([x.getModuleLabel() for x in patJetsDefault.discriminatorSources] + ['pfBoostedDoubleSecondaryVertexAK8BJetTags']),
btagDiscriminators = ([x.value() for x in patJetsDefault.discriminatorSources] + ['pfBoostedDoubleSecondaryVertexAK8BJetTags']),
genJetCollection = cms.InputTag('slimmedGenJetsAK8')
)
getattr(process,"patJetsAK8Puppi"+postfix).userData.userFloats.src = [] # start with empty list of user floats
Expand Down
6 changes: 3 additions & 3 deletions PhysicsTools/PatAlgos/python/tools/jetTools.py
Expand Up @@ -381,7 +381,7 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou
if btagInfo == 'pfDeepCMVATagInfos':
addToProcessAndTask(btagPrefix+btagInfo+labelName+postfix,
btag.pfDeepCMVATagInfos.clone(
pfDeepCSVTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
deepNNTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
ipInfoSrc = cms.InputTag(btagPrefix+"pfImpactParameterTagInfos"+labelName+postfix),
muInfoSrc = cms.InputTag(btagPrefix+"softPFMuonsTagInfos"+labelName+postfix),
elInfoSrc = cms.InputTag(btagPrefix+"softPFElectronsTagInfos"+labelName+postfix)),
Expand All @@ -391,7 +391,7 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou
if btagInfo == 'pfDeepCMVANegativeTagInfos':
addToProcessAndTask(btagPrefix+btagInfo+labelName+postfix,
btag.pfDeepCMVATagInfos.clone(
pfDeepCSVTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
deepNNTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
ipInfoSrc = cms.InputTag(btagPrefix+"pfImpactParameterTagInfos"+labelName+postfix),
muInfoSrc = cms.InputTag(btagPrefix+"softPFMuonsTagInfos"+labelName+postfix),
elInfoSrc = cms.InputTag(btagPrefix+"softPFElectronsTagInfos"+labelName+postfix)),
Expand All @@ -401,7 +401,7 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou
if btagInfo == 'pfDeepCMVAPositiveTagInfos':
addToProcessAndTask(btagPrefix+btagInfo+labelName+postfix,
btag.pfDeepCMVATagInfos.clone(
pfDeepCSVTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
deepNNTagInfos = cms.InputTag(btagPrefix+'pfDeepCSVTagInfos'+labelName+postfix),
ipInfoSrc = cms.InputTag(btagPrefix+"pfImpactParameterTagInfos"+labelName+postfix),
muInfoSrc = cms.InputTag(btagPrefix+"softPFMuonsTagInfos"+labelName+postfix),
elInfoSrc = cms.InputTag(btagPrefix+"softPFElectronsTagInfos"+labelName+postfix)),
Expand Down
16 changes: 14 additions & 2 deletions PhysicsTools/PatAlgos/test/patTuple_addBTagging_cfg.py
Expand Up @@ -101,6 +101,17 @@
,'pfCombinedCvsBJetTags'
# ChargeTagging
,'pfChargeBJetTags'
#Deep Flavour
,'pfDeepCSVJetTags:probb'
,'pfDeepCSVJetTags:probc'
,'pfDeepCSVJetTags:probudsg'
,'pfDeepCSVJetTags:probbb'
# DeepCMVA
,'pfDeepCMVAJetTags:probb'
,'pfDeepCMVAJetTags:probc'
,'pfDeepCMVAJetTags:probudsg'
,'pfDeepCMVAJetTags:probbb'
,'pfDeepCMVAJetTags:probcc'
]

# uncomment the following lines to add ak4PFJets with new b-tags to your PAT output
Expand All @@ -126,18 +137,19 @@
process.patJetsAK8PFCHS.addTagInfos = True

# uncomment the following lines to add subjets of ak8PFJetsCHSSoftDrop with new b-tags to your PAT output
from pdb import set_trace
addJetCollection(
process,
labelName = 'AK8PFCHSSoftDropSubjets',
jetSource = cms.InputTag('ak8PFJetsCHSSoftDrop','SubJets'),
jetCorrections = ('AK4PFchs', cms.vstring(['L1FastJet', 'L2Relative', 'L3Absolute']), 'Type-2'), # Using AK4 JECs for subjets which might not be completely appropriate
algo = 'AK', # needed for subjet flavor clustering
rParam = 0.8, # needed for subjet flavor clustering
btagDiscriminators = btagDiscriminators,
explicitJTA = True, # needed for subjet b tagging
svClustering = True, # needed for subjet b tagging
fatJets = cms.InputTag("ak8PFJetsCHS"), # needed for subjet flavor clustering
groomedFatJets = cms.InputTag("ak8PFJetsCHSSoftDrop") # needed for subjet flavor clustering
groomedFatJets = cms.InputTag("ak8PFJetsCHSSoftDrop"), # needed for subjet flavor clustering
rParam = 0.8, # needed for subjet flavor clustering
)
process.patJetsAK8PFCHSSoftDropSubjets.addTagInfos = True

Expand Down
Expand Up @@ -50,8 +50,8 @@
updateJetCollection(
process,
labelName = 'SoftDropSubjets',
jetSource = cms.InputTag('slimmedJetsAK8PFCHSSoftDropPacked:SubJets'),
jetCorrections = ('AK4PFchs', cms.vstring(['L1FastJet', 'L2Relative', 'L3Absolute']), 'None'),
jetSource = cms.InputTag('slimmedJetsAK8PFPuppiSoftDropPacked:SubJets'),
jetCorrections = ('AK4PFPuppi', cms.vstring(['L2Relative', 'L3Absolute']), 'None'),
btagDiscriminators = ['pfCombinedSecondaryVertexV2BJetTags', 'pfCombinedInclusiveSecondaryVertexV2BJetTags'],
explicitJTA = True, # needed for subjet b tagging
svClustering = False, # needed for subjet b tagging (IMPORTANT: Needs to be set to False to disable ghost-association which does not work with slimmed jets)
Expand Down
44 changes: 37 additions & 7 deletions RecoBTag/Combined/plugins/DeepFlavourJetTagsProducer.cc
Expand Up @@ -42,6 +42,7 @@

#include <fstream>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <iostream>
Expand Down Expand Up @@ -76,10 +77,13 @@ class DeepFlavourJetTagsProducer : public edm::stream::EDProducer<> {
// ----------member data ---------------------------
const edm::EDGetTokenT< INFOS > src_;
edm::FileInPath nnconfig_;
bool check_sv_for_defaults_;
bool mean_padding_;
lwt::LightweightNeuralNetwork *neural_network_;
lwt::ValueMap inputs_; //typedef of unordered_map<string, float>
vector<string> outputs_;
vector<MVAVar> variables_;
map<string, string> toadd_;
};

//
Expand All @@ -97,6 +101,8 @@ class DeepFlavourJetTagsProducer : public edm::stream::EDProducer<> {
DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet& iConfig) :
src_( consumes< INFOS >(iConfig.getParameter<edm::InputTag>("src")) ),
nnconfig_(iConfig.getParameter<edm::FileInPath>("NNConfig")),
check_sv_for_defaults_(iConfig.getParameter<bool>("checkSVForDefaults")),
mean_padding_(iConfig.getParameter<bool>("meanPadding")),
neural_network_(NULL),
inputs_(),
outputs_(),
Expand All @@ -109,12 +115,27 @@ DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet&
//create NN and store the output names for the future
neural_network_ = new lwt::LightweightNeuralNetwork(config.inputs, config.layers, config.outputs);
outputs_ = config.outputs;
set<string> outset(outputs_.begin(), outputs_.end());

//in case we want to merge some different outputs together
edm::ParameterSet toadd = iConfig.getParameter<edm::ParameterSet>("toAdd");
for(auto output : toadd.getParameterNamesForType<string>()) {
string target = toadd.getParameter<string>(output);
if(outset.find(output) == outset.end())
throw cms::Exception("RuntimeError") << "The required output: " << output << " to be added to " << target << " could not be found among the NN outputs" << endl;
if(outset.find(target) == outset.end())
throw cms::Exception("RuntimeError") << "The required output: " << target << ", target of addition of " << output << " could not be found among the NN outputs" << endl;
toadd_[output] = target;
}

//produce one output kind per node
//produce one output kind per node
for(auto outnode : config.outputs) {
produces<JetTagCollection>(outnode);
if(toadd_.find(outnode) == toadd_.end()){ //produce output only if does not get added
produces<JetTagCollection>(outnode);
}
}


//get the set-up for the inputs
for(auto& input : config.inputs) {
MVAVar var;
Expand All @@ -134,7 +155,8 @@ DeepFlavourJetTagsProducer::DeepFlavourJetTagsProducer(const edm::ParameterSet&
<< ". Please check the spelling" << std::endl;
}
var.index = (tokens.size() == 2) ? stoi(tokens.at(1)) : -1;
var.default_value = -1*input.offset; //set default to -offset so that when scaling (val+offset)*scale the outcome is 0
var.default_value = (mean_padding_) ? 0. : -1*input.offset; //set default to -offset so that when scaling (val+offset)*scale the outcome is 0
//for mean padding it is set to zero so that undefined values are assigned -mean/scale

variables_.push_back(var);
}
Expand Down Expand Up @@ -184,9 +206,11 @@ DeepFlavourJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& i
TaggingVariableList vars = info.taggingVariables();
//if there are no tracks there's no point in doing it
bool notracks = (vars.get(reco::btau::jetNSelectedTracks) == 0);
bool novtx = (vars.get(reco::btau::jetNSecondaryVertices) == 0);
bool defaulted = (check_sv_for_defaults_) ? (notracks && novtx) : notracks;
lwt::ValueMap nnout; //returned value

if(!notracks) {
if(!defaulted) {
for(auto& var : variables_) {
if(var.index >= 0){
std::vector<float> vals = vars.getList(var.id, false);
Expand All @@ -200,21 +224,27 @@ DeepFlavourJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSetup& i

//compute NN output(s)
nnout = neural_network_->compute(inputs_);

//merge outputs
for(auto entry : toadd_) {
nnout[entry.second] += nnout[entry.first];
}
}

//ket the maps key
edm::RefToBase<Jet> key = info.jet();

//dump the NN output(s)
for(size_t i=0; i<outputs_.size(); ++i) {
(*output_tags[i])[key] = (notracks) ? -1 : nnout[outputs_[i]];
//std::cout << i << ": " << nnout[outputs_[i]] << std::endl;
(*output_tags[i])[key] = (defaulted) ? -1 : nnout[outputs_[i]];
}
}

// put the output in the event
for(size_t i=0; i<outputs_.size(); ++i) {
iEvent.put(std::move(output_tags[i]), outputs_[i]);
if(toadd_.find(outputs_[i]) == toadd_.end()) {
iEvent.put(std::move(output_tags[i]), outputs_[i]);
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions RecoBTag/Combined/python/deepFlavour_cff.py
@@ -1,6 +1,6 @@
import FWCore.ParameterSet.Config as cms
from RecoBTag.Combined.pfDeepCSVTagInfos_cfi import pfDeepCSVTagInfos
from RecoBTag.Combined.DeepCMVATagInfoProducer_cfi import pfDeepCMVATagInfos
from RecoBTag.Combined.pfDeepCMVATagInfos_cfi import pfDeepCMVATagInfos
from RecoBTag.Combined.pfDeepCSVJetTags_cfi import pfDeepCSVJetTags
from RecoBTag.Combined.pfDeepCMVAJetTags_cfi import pfDeepCMVAJetTags

Expand Down Expand Up @@ -52,8 +52,8 @@
##
pfDeepFlavourTask = cms.Task(
pfDeepCSVTagInfos,
## pfDeepCMVATagInfos, #SKIP for the moment
pfDeepCMVATagInfos, #SKIP for the moment
pfDeepCSVJetTags
## , pfDeepCMVAJetTags
, pfDeepCMVAJetTags
)
pfDeepFlavour = cms.Sequence(pfDeepFlavourTask)
6 changes: 5 additions & 1 deletion RecoBTag/Combined/python/pfDeepCMVAJetTags_cfi.py
Expand Up @@ -3,5 +3,9 @@
pfDeepCMVAJetTags = cms.EDProducer(
'DeepFlavourJetTagsProducer',
src = cms.InputTag('pfDeepCMVATagInfos'),
NNConfig = cms.FileInPath('RecoBTag/Combined/data/Model_DeepCMVA.json')
checkSVForDefaults = cms.bool(False),
meanPadding = cms.bool(False),
NNConfig = cms.FileInPath('RecoBTag/Combined/data/Model_DeepCMVA.json'),
toAdd = cms.PSet(
),
)
19 changes: 18 additions & 1 deletion RecoBTag/Combined/python/pfDeepCSVJetTags_cfi.py
Expand Up @@ -3,5 +3,22 @@
pfDeepCSVJetTags = cms.EDProducer(
'DeepFlavourJetTagsProducer',
src = cms.InputTag('pfDeepCSVTagInfos'),
NNConfig = cms.FileInPath('RecoBTag/Combined/data/DeepFlavourNoSL.json')
checkSVForDefaults = cms.bool(False),
meanPadding = cms.bool(False),
NNConfig = cms.FileInPath('RecoBTag/Combined/data/DeepFlavourNoSL.json'),
toAdd = cms.PSet(
probcc = cms.string('probc')
),
)

from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel
phase1Pixel.toModify(pfDeepCSVJetTags, NNConfig = cms.FileInPath('RecoBTag/Combined/data/DeepCSV_PhaseI.json'))
phase1Pixel.toModify(pfDeepCSVJetTags, checkSVForDefaults = cms.bool(True))
phase1Pixel.toModify(pfDeepCSVJetTags, meanPadding = cms.bool(True))
phase1Pixel.toModify(pfDeepCSVJetTags, toAdd = cms.PSet())

from Configuration.Eras.Modifier_phase2_common_cff import phase2_common
phase2_common.toModify(pfDeepCSVJetTags, NNConfig = cms.FileInPath('RecoBTag/Combined/data/DeepCSV_PhaseI.json'))
phase2_common.toModify(pfDeepCSVJetTags, checkSVForDefaults = cms.bool(True))
phase2_common.toModify(pfDeepCSVJetTags, meanPadding = cms.bool(True))
phase2_common.toModify(pfDeepCSVJetTags, toAdd = cms.PSet())
24 changes: 5 additions & 19 deletions RecoBTag/Combined/scripts/format_deepflavour_json.sh
@@ -1,23 +1,9 @@
#! /bin/env bash

sed -i 's|Jet_eta|jetEta|g' $1
sed -i 's|Jet_pt|jetPt|g' $1
#sed -i 's|jet_eta|jetEta|g' $1
sed -i 's|jet_eta|jetAbsEta|g' $1
sed -i 's|jet_pt|jetPt|g' $1
sed -i 's|TagVarCSV_||g' $1
sed -i 's|TagVarCSVTrk_||g' $1
sed -i 's|prob_|prob|g' $1

#bugfixes
sed -i 's|jetNTracks|jetNSelectedTracks|g' $1
sed -i 's|jetNSelectedTracksEtaRel|jetNTracksEtaRel|g' $1

python <<EOF
import json
with open('$1') as infile:
jmap = json.loads(infile.read())
for var in jmap['inputs']:
var['offset'] *= -1
var['scale'] = 1./var['scale']
with open('$1', 'w') as out:
out.write(json.dumps(jmap, indent=2, separators = (',', ': ')))
EOF
sed -i 's|trackJetDistVal|trackJetDist|g' $1

0 comments on commit 21c2125

Please sign in to comment.