Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduced radom number calls in the runSimple Method of HGCDigitizerBase for Potential Speedup. #27980

Merged
merged 7 commits into from Sep 24, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 12 additions & 1 deletion SimCalorimetry/HGCalSimProducers/interface/HGCDigitizerBase.h
Expand Up @@ -22,7 +22,7 @@
#include "Geometry/HcalTowerAlgo/interface/HcalGeometry.h"

#include "SimCalorimetry/HGCalSimAlgos/interface/HGCalSiNoiseMap.h"

#include "TRandom.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adas1994 why aren't you using the framework random number generator service?

namespace hgc = hgc_digi;

namespace hgc_digi_utils {
Expand Down Expand Up @@ -68,6 +68,10 @@ class HGCDigitizerBase {
@short CTOR
*/
HGCDigitizerBase(const edm::ParameterSet& ps);
/**
@short Gaussian Noise Generation Member Function
*/
void GenerateGaussianNoise(const double NoiseMean, const double NoiseStd);
/**
@short steer digitization mode
*/
Expand Down Expand Up @@ -150,6 +154,13 @@ class HGCDigitizerBase {

//if set to true, threshold will be computed based on the expected meap peak/2
bool thresholdFollowsMIP_;

// New NoiseArray Parameters
const double NoiseMean_, NoiseStd_;
static const size_t NoiseArrayLength_ = 50000;
static const size_t samplesize_ = 15;
std::array<std::array<double, samplesize_>, NoiseArrayLength_> GaussianNoiseArray_;
unsigned int SeedOffset_;
};

#endif
Expand Up @@ -6,7 +6,7 @@

#include "CLHEP/Random/RandGauss.h"
#include "CLHEP/Random/RandGaussQ.h"

#include "CLHEP/Random/RandFlat.h"
#include "SimCalorimetry/HGCalSimProducers/interface/HGCDigitizerTypes.h"

/**
Expand Down
4 changes: 4 additions & 0 deletions SimCalorimetry/HGCalSimProducers/python/hgcalDigitizer_cfi.py
Expand Up @@ -66,6 +66,7 @@
accumulatorType = cms.string("HGCDigiProducer"),
hitCollection = cms.string("HGCHitsEE"),
digiCollection = cms.string("HGCDigisEE"),
seedOffset = cms.uint32(9182),
maxSimHitsAccTime = cms.uint32(100),
bxTime = cms.double(25),
eVPerEleHolePair = cms.double(eV_per_eh_pair),
Expand Down Expand Up @@ -135,6 +136,7 @@
accumulatorType = cms.string("HGCDigiProducer"),
hitCollection = cms.string("HGCHitsHEfront"),
digiCollection = cms.string("HGCDigisHEfront"),
seedOffset = cms.uint32(8273),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since these parameters are no longer used, they can be deleted

maxSimHitsAccTime = cms.uint32(100),
bxTime = cms.double(25),
tofDelay = cms.double(5),
Expand Down Expand Up @@ -203,6 +205,7 @@
accumulatorType = cms.string("HGCDigiProducer"),
hitCollection = cms.string("HcalHits"),
digiCollection = cms.string("HGCDigisHEback"),
seedOffset = cms.uint32(7364),
maxSimHitsAccTime = cms.uint32(100),
bxTime = cms.double(25),
tofDelay = cms.double(1),
Expand Down Expand Up @@ -246,6 +249,7 @@
accumulatorType = cms.string("HGCDigiProducer"),
hitCollection = cms.string("HFNoseHits"),
digiCollection = cms.string("HFNoseDigis"),
seedOffset = cms.uint32(5647),
maxSimHitsAccTime = cms.uint32(100),
bxTime = cms.double(25),
eVPerEleHolePair = cms.double(eV_per_eh_pair),
Expand Down
26 changes: 21 additions & 5 deletions SimCalorimetry/HGCalSimProducers/src/HGCDigitizerBase.cc
Expand Up @@ -6,12 +6,13 @@ using namespace hgc_digi;
using namespace hgc_digi_utils;

template <class DFr>
HGCDigitizerBase<DFr>::HGCDigitizerBase(const edm::ParameterSet& ps) : scaleByDose_(false) {
HGCDigitizerBase<DFr>::HGCDigitizerBase(const edm::ParameterSet& ps)
: scaleByDose_(false), NoiseMean_(0.0), NoiseStd_(1.0) {
bxTime_ = ps.getParameter<double>("bxTime");
myCfg_ = ps.getParameter<edm::ParameterSet>("digiCfg");
SeedOffset_ = ps.getParameter<unsigned int>("seedOffset"),
doTimeSamples_ = myCfg_.getParameter<bool>("doTimeSamples");
thresholdFollowsMIP_ = myCfg_.getParameter<bool>("thresholdFollowsMIP");

if (myCfg_.exists("keV2fC"))
keV2fC_ = myCfg_.getParameter<double>("keV2fC");
else
Expand Down Expand Up @@ -51,6 +52,19 @@ HGCDigitizerBase<DFr>::HGCDigitizerBase(const edm::ParameterSet& ps) : scaleByDo
edm::ParameterSet feCfg = myCfg_.getParameter<edm::ParameterSet>("feCfg");
myFEelectronics_ = std::unique_ptr<HGCFEElectronics<DFr>>(new HGCFEElectronics<DFr>(feCfg));
myFEelectronics_->SetNoiseValues(noise_fC_);
GenerateGaussianNoise(NoiseMean_, NoiseStd_);
}

template <class DFr>
void HGCDigitizerBase<DFr>::GenerateGaussianNoise(const double NoiseMean, const double NoiseStd) {
unsigned int seed = 123456;
seed = seed + SeedOffset_;
TRandom trandom(seed);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adas1994 why not using the framework service based on the defined engine, i.e. move here CLHEP::RandGaussQ::shoot(engine, 0.0, noiseWidth) ? Furthermore TRandom looks the generator of poorest quality in the ROOT family according to the documentation The generator provided in TRandom itself is a LCG (Linear Congruential Generator), the BSD rand generator, that it should not be used because its period is only 2**31, i.e. approximatly 2 billion events, that can be generated in just few seconds.

Copy link
Contributor Author

@adas1994 adas1994 Sep 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

( @fabiocos )We were trying to see, if we can get to make HGCal simulate similar noise using random numbers from a much smaller pool of Gaussian random numbers and not generating a whole bunch of them for every channel for every event.

Consider this, at the end of the day, it is all just a bunch of Normal/Gaussian random numbers. And we are not generating billions of them so that they could cause problem due to its limited period.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adas1994 that has nothing to do with which pRNG should be used. Similar comments had been made on previous versions of this PR. Please use the CMSSW random number service exclusively.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kpedro88 @fabiocos The problem is that what needs to happen is that the noise collection needs to be generated in the constructor, before there are any streams to refer to. Here, the noise itself is generated with a fixed seed, so it is completely reproducible. The random number service is used later to choose noise values from this fixed array, so that is thread safe.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mdhildreth I imagine that what you need here is to have a predefined noise collection before the first event noise is produced, which does not necessarily mean within the constructor. I understand the rationale behind this strategy, but I also see risks:

  • is this collection big enough to avoid reusing too frequently the same noise and create artificial correlations?

  • although strictly speaking your noise is reproducible reusing the same configuration, the change of random numbers among jobs in production are managed through the framework service, nobody will manually randomize the starting seed offset in your configuration, so you will end up in each single job with exactly the same noise, which I doubt is what we want. The configurable offsets will be practically useless, and you might probably structure things so as the noise array is static since every instance will end up using the same.

Provided this is really the way you want to go I can imagine a strategy to initialise the noise array within runSimple at the first function call, to be done with care in a multi-thread safe way. That would be a clean way to rely on the random number service at least, although reusing the same library.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabiocos The idea is to make the library big enough so that it doesn't matter. It's just generating low-level single-channel noise, over millions of channels, using a gaussian distribution. We should make it big enough so it can't be seen in physics validation plots, of course. In terms of the random seed variation: it doesn't matter if this is randomized if the collection is big enough. Each event gets an appropriate random entry into the library in a thread-safe way. This should be sufficient.

We can look at runSimple to see if this is a better way. Thanks.

for (size_t i = 0; i < NoiseArrayLength_; i++) {
for (size_t j = 0; j < samplesize_; j++) {
GaussianNoiseArray_[i][j] = trandom.Gaus(NoiseMean, NoiseStd);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in principle there is also the fireArray method that can be used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have stored the random numbers generated by old method and hashed by our new method right before they are being used in runShaperToT and plotted a random sample from them to visualize any possible bias. I found these :
HGCDigisEE
HGCDigisHEfront
HFNoseDigis

.

}
}
}

template <class DFr>
Expand Down Expand Up @@ -87,7 +101,8 @@ void HGCDigitizerBase<DFr>::runSimple(std::unique_ptr<HGCDigitizerBase::DColl>&
HGCSimHitDataAccumulator::iterator it = simData.find(id);
HGCCellInfo& cell = (simData.end() == it ? zeroData : it->second);
addCellMetadata(cell, theGeom, id);

size_t hash_index = (CLHEP::RandFlat::shootInt(engine, (NoiseArrayLength_ - 1)) + id) % NoiseArrayLength_;
auto cellNoiseArray = GaussianNoiseArray_[hash_index];
//set the noise,cce, LSB and threshold to be used
float cce(1.f), noiseWidth(0.f), lsbADC(-1.f), maxADC(-1.f);
uint32_t thrADC(std::floor(myFEelectronics_->getTargetMipValue() / 2));
Expand Down Expand Up @@ -122,7 +137,7 @@ void HGCDigitizerBase<DFr>::runSimple(std::unique_ptr<HGCDigitizerBase::DColl>&
toa[i] = cell.hit_info[1][i] / rawCharge;

//final charge estimation
float noise = CLHEP::RandGaussQ::shoot(engine, 0.0, noiseWidth);
float noise = (float)cellNoiseArray[i] * noiseWidth;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be worth putting a switch here to go back to the old behavior, in case someone wants to run a test in the future.

float totalCharge(rawCharge * cce + noise);
if (totalCharge < 0.f)
totalCharge = 0.f;
Expand All @@ -131,7 +146,8 @@ void HGCDigitizerBase<DFr>::runSimple(std::unique_ptr<HGCDigitizerBase::DColl>&

//run the shaper to create a new data frame
DFr rawDataFrame(id);
myFEelectronics_->runShaper(rawDataFrame, chargeColl, toa, engine, thrADC, lsbADC, maxADC, cell.thickness);
int thickness = cell.thickness > 0 ? cell.thickness : 1;
myFEelectronics_->runShaper(rawDataFrame, chargeColl, toa, engine, thrADC, lsbADC, maxADC, thickness);

//update the output according to the final shape
updateOutput(coll, rawDataFrame);
Expand Down