Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated MLPF producer with ONNX #36841

Merged
merged 11 commits into from Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -391,7 +391,7 @@ def setup_(self, step, stepName, stepDict, k, properties):
if 'Reco' in step:
stepDict[stepName][k] = merge([self.step3, stepDict[step][k]])
def condition(self, fragment, stepList, key, hasHarvest):
return fragment=="TTbar_14TeV" and '2021' in key
return (fragment=="TTbar_14TeV" or fragment=="QCD_FlatPt_15_3000HS_14") and '2021PU' in key

upgradeWFs['mlpf'] = UpgradeWorkflow_mlpf(
steps = [
Expand Down
Expand Up @@ -81,8 +81,3 @@
'keep recoPFRecHits_particleFlowRecHitHGC__*',
'keep *_simPFProducer_*_*'])

from Configuration.ProcessModifiers.mlpf_cff import mlpf
from RecoParticleFlow.PFProducer.mlpf_EventContent_cff import MLPF_RECO

mlpf.toModify(RecoParticleFlowRECO,
outputCommands = RecoParticleFlowRECO.outputCommands + MLPF_RECO.outputCommands)
9 changes: 3 additions & 6 deletions RecoParticleFlow/Configuration/python/RecoParticleFlow_cff.py
Expand Up @@ -102,14 +102,11 @@
e.toModify(pfNoPileUp, enable = cms.bool(False))
e.toModify(pfPileUp, enable = cms.bool(False))

#
# for MLPF

# for MLPF, replace standard PFAlgo with the ONNX-based MLPF producer
from Configuration.ProcessModifiers.mlpf_cff import mlpf
from RecoParticleFlow.PFProducer.mlpfProducer_cfi import mlpfProducer

_mlpfTask = cms.Task(mlpfProducer, particleFlowRecoTask.copy())

mlpf.toReplaceWith(particleFlowRecoTask, _mlpfTask)
mlpf.toReplaceWith(particleFlowTmp, mlpfProducer)

#
# switch from pfTICL to simPF
Expand Down
1 change: 1 addition & 0 deletions RecoParticleFlow/PFProducer/BuildFile.xml
Expand Up @@ -16,6 +16,7 @@
<use name="boost"/>
<use name="clhep"/>
<use name="rootmath"/>
<use name="onnxruntime"/>
<export>
<lib name="1"/>
</export>
53 changes: 34 additions & 19 deletions RecoParticleFlow/PFProducer/interface/MLPFModel.h
Expand Up @@ -7,31 +7,40 @@

namespace reco::mlpf {
//The model takes the following number of features for each input PFElement
static constexpr unsigned int NUM_ELEMENT_FEATURES = 15;
static constexpr unsigned int NUM_ELEMENT_FEATURES = 25;
static constexpr unsigned int NUM_OUTPUT_FEATURES = 14;

//these are defined at model creation time and set the random LSH codebook size
static constexpr int NUM_MAX_ELEMENTS_BATCH = 20000;
static constexpr int LSH_BIN_SIZE = 100;
static constexpr int LSH_BIN_SIZE = 64;
static constexpr int NUM_MAX_ELEMENTS_BATCH = 200 * LSH_BIN_SIZE;

//In CPU mode, we only want to evaluate each event separately
//In CPU mode, we want to evaluate each event separately
static constexpr int BATCH_SIZE = 1;

//The model has 12 outputs for each particle:
// out[0-7]: particle classification logits
// out[8]: regressed eta
// out[9]: regressed phi
// out[10]: regressed energy
// out[11]: regressed charge logit
static constexpr unsigned int NUM_OUTPUTS = 12;
static constexpr unsigned int NUM_CLASS = 7;
static constexpr unsigned int IDX_ETA = 8;
static constexpr unsigned int IDX_PHI = 9;
static constexpr unsigned int IDX_ENERGY = 10;
static constexpr unsigned int IDX_CHARGE = 11;
//The model has 14 outputs for each particle:
// out[0-7]: particle classification logits for each pdgId
// out[8]: regressed charge
// out[9]: regressed pt
// out[10]: regressed eta
// out[11]: regressed sin phi
// out[12]: regressed cos phi
// out[13]: regressed energy
static constexpr unsigned int IDX_CLASS = 7;

static constexpr unsigned int IDX_CHARGE = 8;

static constexpr unsigned int IDX_PT = 9;
static constexpr unsigned int IDX_ETA = 10;
static constexpr unsigned int IDX_SIN_PHI = 11;
static constexpr unsigned int IDX_COS_PHI = 12;
static constexpr unsigned int IDX_ENERGY = 13;

//for consistency with the baseline PFAlgo
static constexpr float PI_MASS = 0.13957;

//index [0, N_pdgids) -> PDGID
//this maps the absolute values of the predicted PDGIDs to an array of ascending indices
static const std::vector<int> pdgid_encoding = {0, 1, 2, 11, 13, 22, 130, 211};
static const std::vector<int> pdgid_encoding = {0, 211, 130, 1, 2, 22, 11, 13};

//PFElement::type -> index [0, N_types)
//this maps the type of the PFElement to an ascending index that is used by the model to distinguish between different elements
Expand All @@ -55,7 +64,13 @@ namespace reco::mlpf {

int argMax(std::vector<float> const& vec);

reco::PFCandidate makeCandidate(int pred_pid, int pred_charge, float pred_e, float pred_eta, float pred_phi);
reco::PFCandidate makeCandidate(int pred_pid,
int pred_charge,
float pred_pt,
float pred_eta,
float pred_sin_phi,
float pred_cos_phi,
float pred_e);

const std::vector<const reco::PFBlockElement*> getPFElements(const reco::PFBlockCollection& blocks);

Expand All @@ -64,4 +79,4 @@ namespace reco::mlpf {
size_t ielem_originator);
}; // namespace reco::mlpf

#endif
#endif
2 changes: 1 addition & 1 deletion RecoParticleFlow/PFProducer/plugins/BuildFile.xml
Expand Up @@ -80,6 +80,6 @@
<use name="RecoParticleFlow/PFProducer"/>
<use name="RecoParticleFlow/PFTracking"/>
<use name="RecoEcal/EgammaCoreTools"/>
<use name="PhysicsTools/TensorFlow"/>
<use name="PhysicsTools/ONNXRuntime"/>
<flags EDM_PLUGIN="1"/>
</library>
174 changes: 107 additions & 67 deletions RecoParticleFlow/PFProducer/plugins/MLPFProducer.cc
Expand Up @@ -4,135 +4,175 @@
#include "FWCore/Framework/interface/MakerMacros.h"

#include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h"
#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
#include "RecoParticleFlow/PFProducer/interface/MLPFModel.h"

struct MLPFCache {
const tensorflow::GraphDef* graph_def;
};
#include "DataFormats/ParticleFlowReco/interface/PFBlockElementTrack.h"

using namespace cms::Ort;

//use this to switch on detailed print statements in MLPF
//#define MLPF_DEBUG

class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<MLPFCache> > {
class MLPFProducer : public edm::stream::EDProducer<edm::GlobalCache<ONNXRuntime>> {
public:
explicit MLPFProducer(const edm::ParameterSet&, const MLPFCache*);
explicit MLPFProducer(const edm::ParameterSet&, const ONNXRuntime*);

void produce(edm::Event& event, const edm::EventSetup& setup) override;
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);

// static methods for handling the global cache
static std::unique_ptr<MLPFCache> initializeGlobalCache(const edm::ParameterSet&);
static void globalEndJob(MLPFCache*);
static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
static void globalEndJob(const ONNXRuntime*);

private:
const edm::EDPutTokenT<reco::PFCandidateCollection> pfCandidatesPutToken_;
const edm::EDGetTokenT<reco::PFBlockCollection> inputTagBlocks_;
const std::string model_path_;
tensorflow::Session* session_;
};

MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const MLPFCache* cache)
MLPFProducer::MLPFProducer(const edm::ParameterSet& cfg, const ONNXRuntime* cache)
: pfCandidatesPutToken_{produces<reco::PFCandidateCollection>()},
inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))),
model_path_(cfg.getParameter<std::string>("model_path")) {
session_ = tensorflow::createSession(cache->graph_def);
}
inputTagBlocks_(consumes<reco::PFBlockCollection>(cfg.getParameter<edm::InputTag>("src"))) {}

void MLPFProducer::produce(edm::Event& event, const edm::EventSetup& setup) {
using namespace reco::mlpf;

const auto& blocks = event.get(inputTagBlocks_);
const auto& all_elements = getPFElements(blocks);

const long long int num_elements_total = all_elements.size();
std::vector<const reco::PFBlockElement*> selected_elements;
unsigned int num_elements_total = 0;
for (const auto* pelem : all_elements) {
if (pelem->type() == reco::PFBlockElement::PS1 || pelem->type() == reco::PFBlockElement::PS2) {
continue;
}
num_elements_total += 1;
selected_elements.push_back(pelem);
}
const auto tensor_size = LSH_BIN_SIZE * std::max(2u, (num_elements_total / LSH_BIN_SIZE + 1));

#ifdef MLPF_DEBUG
assert(num_elements_total < NUM_MAX_ELEMENTS_BATCH);
//tensor size must be a multiple of the bin size and larger than the number of elements
const auto tensor_size = LSH_BIN_SIZE * (num_elements_total / LSH_BIN_SIZE + 1);
assert(tensor_size <= NUM_MAX_ELEMENTS_BATCH);
assert(tensor_size % LSH_BIN_SIZE == 0);
#endif

//Create the input tensor
tensorflow::TensorShape shape({BATCH_SIZE, tensor_size, NUM_ELEMENT_FEATURES});
tensorflow::Tensor input(tensorflow::DT_FLOAT, shape);
input.flat<float>().setZero();
#ifdef MLPF_DEBUG
std::cout << "tensor_size=" << tensor_size << std::endl;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed, given the previous assert's?
If really needed to debug, then maybe better to have this cout before L55

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true, it would perhaps be more informative (and less lines) to print out before the assert while debugging. can we address this in a follow-up version, or would you prefer a resigning of this PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, do not request 4+1 signatures only to move one comment line: let postpone it to a follow-up PR

#endif

//Fill the input tensor
//Fill the input tensor (batch, elems, features) = (1, tensor_size, NUM_ELEMENT_FEATURES)
std::vector<std::vector<float>> inputs(1, std::vector<float>(NUM_ELEMENT_FEATURES * tensor_size, 0.0));
unsigned int ielem = 0;
for (const auto* pelem : all_elements) {
for (const auto* pelem : selected_elements) {
if (ielem > tensor_size) {
continue;
}

const auto& elem = *pelem;

//prepare the input array from the PFElement
const auto& props = getElementProperties(elem);

//copy features to the input array
for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
input.tensor<float, 3>()(0, ielem, iprop) = normalize(props[iprop]);
inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] = normalize(props[iprop]);
}
ielem += 1;
}

//TF model input and output tensor names
const tensorflow::NamedTensorList input_list = {{"x:0", input}};
const std::vector<std::string> output_names = {"Identity:0"};

//Prepare the output tensor
std::vector<tensorflow::Tensor> outputs;

//run the GNN inference, given the inputs and the output.
//Note that the GNN enables information transfer between the input PFElements,
//such that the output ML-PFCandidates are in general combinations of the input PFElements, in the form of
//y_out = Adj.x_in, where x_in is input matrix (num_elem, NUM_ELEMENT_FEATURES), y_out is the output matrix (num_elem, NUM_OUTPUT_FEATURES)
//and Adj is an adjacency matrix between the elements that is constructed on the fly during model inference.
tensorflow::run(session_, input_list, output_names, &outputs);

//process the output tensor to ML-PFCandidates.
//The output can contain up to num_elem particles, with predicted PDGID=0 corresponding to no particles predicted.
const auto out_arr = outputs[0].tensor<float, 3>();
const auto& outputs = globalCache()->run({"x:0"}, inputs, {{1, tensor_size, NUM_ELEMENT_FEATURES}});
const auto& output = outputs[0];
#ifdef MLPF_DEBUG
assert(output.size() == tensor_size * NUM_OUTPUT_FEATURES);
#endif

std::vector<reco::PFCandidate> pOutputCandidateCollection;
for (unsigned int ielem = 0; ielem < all_elements.size(); ielem++) {
//get the coefficients in the output corresponding to the class probabilities (raw logits)
std::vector<float> pred_id_logits;
for (unsigned int idx_id = 0; idx_id <= NUM_CLASS; idx_id++) {
pred_id_logits.push_back(out_arr(0, ielem, idx_id));
for (size_t ielem = 0; ielem < num_elements_total; ielem++) {
std::vector<float> pred_id_probas(IDX_CLASS + 1, 0.0);
const reco::PFBlockElement* elem = selected_elements[ielem];

for (unsigned int idx_id = 0; idx_id <= IDX_CLASS; idx_id++) {
auto pred_proba = output[ielem * NUM_OUTPUT_FEATURES + idx_id];
#ifdef MLPF_DEBUG
assert(!std::isnan(pred_proba));
#endif
pred_id_probas[idx_id] = pred_proba;
}

auto imax = argMax(pred_id_probas);

//get the most probable class PDGID
int pred_pid = pdgid_encoding[argMax(pred_id_logits)];
int pred_pid = pdgid_encoding[imax];

//get the predicted momentum components
float pred_eta = out_arr(0, ielem, IDX_ETA);
float pred_phi = out_arr(0, ielem, IDX_PHI);
float pred_charge = out_arr(0, ielem, IDX_CHARGE);
float pred_e = out_arr(0, ielem, IDX_ENERGY);
#ifdef MLPF_DEBUG
std::cout << "ielem=" << ielem << " inputs:";
for (unsigned int iprop = 0; iprop < NUM_ELEMENT_FEATURES; iprop++) {
std::cout << iprop << "=" << inputs[0][ielem * NUM_ELEMENT_FEATURES + iprop] << " ";
}
std::cout << std::endl;
std::cout << "ielem=" << ielem << " pred: pid=" << pred_pid << std::endl;
#endif

//a particle was predicted for this PFElement, otherwise it was a spectator
if (pred_pid != 0) {
auto cand = makeCandidate(pred_pid, pred_charge, pred_e, pred_eta, pred_phi);
setCandidateRefs(cand, all_elements, ielem);
//muons and charged hadrons should only come from tracks, otherwise we won't have track references to pass downstream
if (((pred_pid == 13) || (pred_pid == 211)) && elem->type() != reco::PFBlockElement::TRACK) {
pred_pid = 130;
}

if (elem->type() == reco::PFBlockElement::TRACK) {
const auto* eltTrack = dynamic_cast<const reco::PFBlockElementTrack*>(elem);

//a track with no muon ref should not produce a muon candidate, instead we interpret it as a charged hadron
if (pred_pid == 13 && eltTrack->muonRef().isNull()) {
pred_pid = 211;
}

//tracks from displaced vertices need reference debugging downstream as well, so we just treat them as neutrals for the moment
if ((pred_pid == 211) && (eltTrack->isLinkedToDisplacedVertex())) {
pred_pid = 130;
}
}

//get the predicted momentum components
float pred_pt = output[ielem * NUM_OUTPUT_FEATURES + IDX_PT];
float pred_eta = output[ielem * NUM_OUTPUT_FEATURES + IDX_ETA];
float pred_sin_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_SIN_PHI];
float pred_cos_phi = output[ielem * NUM_OUTPUT_FEATURES + IDX_COS_PHI];
float pred_e = output[ielem * NUM_OUTPUT_FEATURES + IDX_ENERGY];
float pred_charge = output[ielem * NUM_OUTPUT_FEATURES + IDX_CHARGE];

auto cand = makeCandidate(pred_pid, pred_charge, pred_pt, pred_eta, pred_sin_phi, pred_cos_phi, pred_e);
setCandidateRefs(cand, selected_elements, ielem);
pOutputCandidateCollection.push_back(cand);

#ifdef MLPF_DEBUG
std::cout << "ielem=" << ielem << " cand: pid=" << cand.pdgId() << " E=" << cand.energy() << " pt=" << cand.pt()
<< " eta=" << cand.eta() << " phi=" << cand.phi() << " charge=" << cand.charge() << std::endl;
#endif
}
} //loop over PFElements

event.emplace(pfCandidatesPutToken_, pOutputCandidateCollection);
}

std::unique_ptr<MLPFCache> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
// this method is supposed to create, initialize and return a MLPFCache instance
std::unique_ptr<MLPFCache> cache = std::make_unique<MLPFCache>();

//load the frozen TF graph of the GNN model
std::string path = params.getParameter<std::string>("model_path");
auto fullPath = edm::FileInPath(path).fullPath();
LogDebug("MLPFProducer") << "Initializing MLPF model from " << fullPath;

cache->graph_def = tensorflow::loadGraphDef(fullPath);

return cache;
std::unique_ptr<ONNXRuntime> MLPFProducer::initializeGlobalCache(const edm::ParameterSet& params) {
return std::make_unique<ONNXRuntime>(params.getParameter<edm::FileInPath>("model_path").fullPath());
}

void MLPFProducer::globalEndJob(MLPFCache* cache) { delete cache->graph_def; }
void MLPFProducer::globalEndJob(const ONNXRuntime* cache) {}

void MLPFProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("src", edm::InputTag("particleFlowBlock"));
desc.add<std::string>("model_path", "RecoParticleFlow/PFProducer/data/mlpf/mlpf_2020_11_04.pb");
desc.add<edm::FileInPath>(
"model_path",
edm::FileInPath(
"RecoParticleFlow/PFProducer/data/mlpf/"
"mlpf_2021_11_16__no_einsum__all_data_cms-best-of-asha-scikit_20211026_042043_178263.workergpu010.onnx"));
descriptions.addWithDefaultLabel(desc);
}

Expand Down
8 changes: 0 additions & 8 deletions RecoParticleFlow/PFProducer/python/mlpf_EventContent_cff.py

This file was deleted.