Skip to content

Commit

Permalink
Merge pull request #34419 from VinInn/LinLogChi2CUT
Browse files Browse the repository at this point in the history
Lin log chi2 cut for patatrack
  • Loading branch information
cmsbuild committed Jul 18, 2021
2 parents eefe5ec + dcdaca5 commit b8709ea
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 52 deletions.
Expand Up @@ -71,7 +71,6 @@ class TrackingRecHit2DHeterogeneous {
};

using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::CPUTraits>;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cms::cudacompat::HostTraits>;

Expand Down Expand Up @@ -164,9 +163,4 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(
}
}

using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::CPUTraits>;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cms::cudacompat::HostTraits>;

#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
Expand Up @@ -59,6 +59,8 @@ class TrackingRecHit2DSOAView {
m_chargeAndStatus[i] = ich;
}

__device__ __forceinline__ uint32_t charge(int i) const { return __ldg(m_chargeAndStatus + i) & 0xFFFFFF; }

__device__ __forceinline__ Status status(int i) const {
uint8_t w = __ldg(m_chargeAndStatus + i) >> 24;
return *reinterpret_cast<Status*>(&w);
Expand Down
Expand Up @@ -5,7 +5,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

template <>
cms::cuda::host::unique_ptr<float[]> TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const {
cms::cuda::host::unique_ptr<float[]> TrackingRecHit2DGPU::localCoordToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<float[]>(5 * nHits(), stream);
cms::cuda::copyAsync(ret, m_store32, 5 * nHits(), stream);
return ret;
Expand Down
Expand Up @@ -18,7 +18,7 @@ int main() {
auto nHits = 200;
// inner scope to deallocate memory before destroying the stream
{
TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream);
TrackingRecHit2DGPU tkhit(nHits, nullptr, nullptr, stream);

testTrackingRecHit2D::runKernels(tkhit.view());

Expand Down
12 changes: 6 additions & 6 deletions RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu
Expand Up @@ -33,13 +33,13 @@ namespace {

namespace pixelgpudetails {

TrackingRecHit2DCUDA PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
cudaStream_t stream) const {
TrackingRecHit2DGPU PixelRecHitGPUKernel::makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
cudaStream_t stream) const {
auto nHits = clusters_d.nClusters();
TrackingRecHit2DCUDA hits_d(nHits, cpeParams, clusters_d.clusModuleStart(), stream);
TrackingRecHit2DGPU hits_d(nHits, cpeParams, clusters_d.clusModuleStart(), stream);

int threadsPerBlock = 128;
int blocks = digis_d.nModules(); // active modules (with digis)
Expand Down
10 changes: 5 additions & 5 deletions RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h
Expand Up @@ -22,11 +22,11 @@ namespace pixelgpudetails {
PixelRecHitGPUKernel& operator=(const PixelRecHitGPUKernel&) = delete;
PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete;

TrackingRecHit2DCUDA makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
cudaStream_t stream) const;
TrackingRecHit2DGPU makeHitsAsync(SiPixelDigisCUDA const& digis_d,
SiPixelClustersCUDA const& clusters_d,
BeamSpotCUDA const& bs_d,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
cudaStream_t stream) const;
};
} // namespace pixelgpudetails

Expand Down
4 changes: 2 additions & 2 deletions RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc
Expand Up @@ -37,7 +37,7 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> {
const edm::EDGetTokenT<cms::cuda::Product<BeamSpotCUDA>> tBeamSpot;
const edm::EDGetTokenT<cms::cuda::Product<SiPixelClustersCUDA>> token_;
const edm::EDGetTokenT<cms::cuda::Product<SiPixelDigisCUDA>> tokenDigi_;
const edm::EDPutTokenT<cms::cuda::Product<TrackingRecHit2DCUDA>> tokenHit_;
const edm::EDPutTokenT<cms::cuda::Product<TrackingRecHit2DGPU>> tokenHit_;

const pixelgpudetails::PixelRecHitGPUKernel gpuAlgo_;
};
Expand All @@ -47,7 +47,7 @@ SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig)
tBeamSpot(consumes<cms::cuda::Product<BeamSpotCUDA>>(iConfig.getParameter<edm::InputTag>("beamSpot"))),
token_(consumes<cms::cuda::Product<SiPixelClustersCUDA>>(iConfig.getParameter<edm::InputTag>("src"))),
tokenDigi_(consumes<cms::cuda::Product<SiPixelDigisCUDA>>(iConfig.getParameter<edm::InputTag>("src"))),
tokenHit_(produces<cms::cuda::Product<TrackingRecHit2DCUDA>>()) {}
tokenHit_(produces<cms::cuda::Product<TrackingRecHit2DGPU>>()) {}

void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
Expand Down
10 changes: 5 additions & 5 deletions RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromCUDA.cc
Expand Up @@ -40,9 +40,9 @@ class SiPixelRecHitFromCUDA : public edm::stream::EDProducer<edm::ExternalWork>
void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override;

const edm::ESGetToken<TrackerGeometry, TrackerDigiGeometryRecord> geomToken_;
const edm::EDGetTokenT<cms::cuda::Product<TrackingRecHit2DCUDA>> hitsToken_; // CUDA hits
const edm::EDGetTokenT<SiPixelClusterCollectionNew> clusterToken_; // legacy clusters
const edm::EDPutTokenT<SiPixelRecHitCollection> rechitsPutToken_; // legacy rechits
const edm::EDGetTokenT<cms::cuda::Product<TrackingRecHit2DGPU>> hitsToken_; // CUDA hits
const edm::EDGetTokenT<SiPixelClusterCollectionNew> clusterToken_; // legacy clusters
const edm::EDPutTokenT<SiPixelRecHitCollection> rechitsPutToken_; // legacy rechits
const edm::EDPutTokenT<HMSstorage> hostPutToken_;

uint32_t nHits_;
Expand All @@ -53,7 +53,7 @@ class SiPixelRecHitFromCUDA : public edm::stream::EDProducer<edm::ExternalWork>
SiPixelRecHitFromCUDA::SiPixelRecHitFromCUDA(const edm::ParameterSet& iConfig)
: geomToken_(esConsumes()),
hitsToken_(
consumes<cms::cuda::Product<TrackingRecHit2DCUDA>>(iConfig.getParameter<edm::InputTag>("pixelRecHitSrc"))),
consumes<cms::cuda::Product<TrackingRecHit2DGPU>>(iConfig.getParameter<edm::InputTag>("pixelRecHitSrc"))),
clusterToken_(consumes<SiPixelClusterCollectionNew>(iConfig.getParameter<edm::InputTag>("src"))),
rechitsPutToken_(produces<SiPixelRecHitCollection>()),
hostPutToken_(produces<HMSstorage>()) {}
Expand All @@ -68,7 +68,7 @@ void SiPixelRecHitFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& des
void SiPixelRecHitFromCUDA::acquire(edm::Event const& iEvent,
edm::EventSetup const& iSetup,
edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
cms::cuda::Product<TrackingRecHit2DCUDA> const& inputDataWrapped = iEvent.get(hitsToken_);
cms::cuda::Product<TrackingRecHit2DGPU> const& inputDataWrapped = iEvent.get(hitsToken_);
cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)};
auto const& inputData = ctx.get(inputDataWrapped);

Expand Down
20 changes: 14 additions & 6 deletions RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
Expand Up @@ -83,6 +83,11 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t
}

void PixelCPEFast::fillParamsForGpu() {
//
// this code executes only once per job, computation inefficiency is not an issue
// many code blocks are repeated: better keep the computation local and self oconsistent as blocks may in future move around, be deleted ...
// It is valid only for Phase1 and the version of GenError in DB used in late 2018 and in 2021

commonParamsGPU_.theThicknessB = m_DetParams.front().theThickness;
commonParamsGPU_.theThicknessE = m_DetParams.back().theThickness;
commonParamsGPU_.thePitchX = m_DetParams[0].thePitchX;
Expand Down Expand Up @@ -115,7 +120,8 @@ void PixelCPEFast::fillParamsForGpu() {
g.layer = ttopo_.layer(p.theDet->geographicalId());
g.index = i; // better be!
g.rawId = p.theDet->geographicalId();
assert((g.isBarrel ? commonParamsGPU_.theThicknessB : commonParamsGPU_.theThicknessE) == p.theThickness);
auto thickness = g.isBarrel ? commonParamsGPU_.theThicknessB : commonParamsGPU_.theThicknessE;
assert(thickness == p.theThickness);

auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId());
if (oldLayer != g.layer) {
Expand Down Expand Up @@ -215,7 +221,7 @@ void PixelCPEFast::fillParamsForGpu() {
}
#ifdef EDM_ML_DEBUG
// sample yerr as function of position
auto const yoff = -54 * 4.f * commonParamsGPU_.thePitchY;
auto const yoff = -54.f * 4.f * commonParamsGPU_.thePitchY;
for (int ix = 0; ix < 16; ++ix) {
auto y = yoff * (1.f - (0.5f + float(ix)) / 8.f);
auto gvx = p.theOrigin.x() + 40.f * commonParamsGPU_.thePitchY;
Expand All @@ -235,7 +241,7 @@ void PixelCPEFast::fillParamsForGpu() {
auto gvz = 1.f / p.theOrigin.z();
//--- Note that the normalization is not required as only the ratio used

// calculate angles
// calculate angles (fed into errorFromTemplates)
cp.cotalpha = gvx * gvz;
cp.cotbeta = gvy * gvz;
auto aveCB = cp.cotbeta;
Expand Down Expand Up @@ -270,23 +276,25 @@ void PixelCPEFast::fillParamsForGpu() {
g.xfact[kk] *= detx;
g.yfact[kk] *= dety;
}
// sample y in angle
// sample y in "angle" (estimated from cluster size)
float ys = 8.f - 4.f; // apperent bias of half pixel (see plot)
// sample yerr as function of "size"
for (int iy = 0; iy < 16; ++iy) {
ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3)
if (15 == iy)
ys += 8.f; // last bin for "overflow"
// cp.cotalpha = ys*100.f/(8.f*285.f);
cp.cotbeta = std::copysign(ys * 150.f / (8.f * 285.f), aveCB);
// cp.cotalpha = ys*(commonParamsGPU_.thePitchX/(8.f*thickness)); // use this to print sampling in "x" (and comment the line below)
cp.cotbeta = std::copysign(ys * (commonParamsGPU_.thePitchY / (8.f * thickness)), aveCB);
errorFromTemplates(p, cp, 20000.f);
g.sigmay[iy] = toMicron(cp.sigmay);
LogDebug("PixelCPEFast") << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha << '/'
<< cp.cotbeta << ' ' << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) << std::endl;
}
} // loop over det

//
// compute ladder baricenter (only in global z) for the barrel
//
auto& aveGeom = averageGeometry_;
int il = 0;
for (int im = 0, nm = phase1PixelTopology::numberOfModulesInBarrel; im < nm; ++im) {
Expand Down
Expand Up @@ -22,7 +22,7 @@
#include "gpuPixelDoublets.h"

using HitsOnGPU = TrackingRecHit2DSOAView;
using HitsOnCPU = TrackingRecHit2DCUDA;
using HitsOnCPU = TrackingRecHit2DGPU;

using HitToTuple = caConstants::HitToTuple;
using TupleMultiplicity = caConstants::TupleMultiplicity;
Expand Down Expand Up @@ -448,20 +448,38 @@ __global__ void kernel_classifyTracks(HitContainer const *__restrict__ tuples,

// compute a pT-dependent chi2 cut

auto roughLog = [](float x) {
// max diff [0.5,12] at 1.25 0.16143
// average diff 0.0662998
union IF {
uint32_t i;
float f;
};
IF z;
z.f = x;
uint32_t lsb = 1 < 21;
z.i += lsb;
z.i >>= 21;
auto f = z.i & 3;
int ex = int(z.i >> 2) - 127;

// log2(1+0.25*f)
// averaged over bins
const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
return float(ex) + frac[f];
};

// (see CAHitNtupletGeneratorGPU.cc)
//float pt = std::min<float>(tracks->pt(it), cuts.chi2MaxPt);
//float chi2Cut = cuts.chi2Scale *
// (cuts.chi2Coeff[0] + pt * (cuts.chi2Coeff[1] + pt * (cuts.chi2Coeff[2] + pt * cuts.chi2Coeff[3])));
float chi2Cut = cuts.chi2Scale;
// above number were for Quads not normalized so for the time being just multiple by ndof for Quads (triplets to be understood)
if (3.f * tracks->chi2(it) >= chi2Cut) {
float pt = std::min<float>(tracks->pt(it), cuts.chi2MaxPt);
float chi2Cut = cuts.chi2Scale * (cuts.chi2Coeff[0] + roughLog(pt) * cuts.chi2Coeff[1]);
if (tracks->chi2(it) >= chi2Cut) {
#ifdef NTUPLE_FIT_DEBUG
printf("Bad fit %d size %d pt %f eta %f chi2 %f\n",
printf("Bad chi2 %d size %d pt %f eta %f chi2 %f\n",
it,
tuples->size(it),
tracks->pt(it),
tracks->eta(it),
3.f * tracks->chi2(it));
tracks->chi2(it));
#endif
continue;
}
Expand Down
Expand Up @@ -31,15 +31,17 @@ namespace {

cAHitNtupletGenerator::QualityCuts makeQualityCuts(edm::ParameterSet const& pset) {
auto coeff = pset.getParameter<std::vector<double>>("chi2Coeff");
if (coeff.size() != 4) {
auto ptMax = pset.getParameter<double>("chi2MaxPt");
if (coeff.size() != 2) {
throw edm::Exception(edm::errors::Configuration,
"CAHitNtupletGeneratorOnGPU.trackQualityCuts.chi2Coeff must have 4 elements");
"CAHitNtupletGeneratorOnGPU.trackQualityCuts.chi2Coeff must have 2 elements");
}
coeff[1] = (coeff[1] - coeff[0]) / log2(ptMax);
return cAHitNtupletGenerator::QualityCuts{// polynomial coefficients for the pT-dependent chi2 cut
{(float)coeff[0], (float)coeff[1], (float)coeff[2], (float)coeff[3]},
{(float)coeff[0], (float)coeff[1], 0.f, 0.f},
// max pT used to determine the chi2 cut
(float)pset.getParameter<double>("chi2MaxPt"),
// chi2 scale factor: 30 for broken line fit, 45 for Riemann fit
(float)ptMax,
// chi2 scale factor: 8 for broken line fit, ?? for Riemann fit
(float)pset.getParameter<double>("chi2Scale"),
// regional cuts for triplets
{(float)pset.getParameter<double>("tripletMaxTip"),
Expand Down Expand Up @@ -161,11 +163,10 @@ void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription&

edm::ParameterSetDescription trackQualityCuts;
trackQualityCuts.add<double>("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut");
trackQualityCuts.add<std::vector<double>>("chi2Coeff", {1., 0., 0., 0.})
->setComment("Polynomial coefficients to derive the pT-dependent chi2 cut");
trackQualityCuts.add<double>("chi2Scale", 25.)
trackQualityCuts.add<std::vector<double>>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above");
trackQualityCuts.add<double>("chi2Scale", 8.)
->setComment(
"Factor to multiply the pT-dependent chi2 cut (currently: 25 for the broken line fit, - for the Riemann "
"Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann "
"fit)");
trackQualityCuts.add<double>("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV");
trackQualityCuts.add<double>("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm");
Expand All @@ -179,7 +180,7 @@ void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription&
"cuts\" based on the fit results (pT, Tip, Zip).");
}

PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DCUDA const& hits_d,
PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DGPU const& hits_d,
float bfield,
cudaStream_t stream) const {
PixelTrackHeterogeneous tracks(cms::cuda::make_device_unique<pixelTrack::TrackSoA>(stream));
Expand Down
Expand Up @@ -24,7 +24,7 @@ namespace edm {
class CAHitNtupletGeneratorOnGPU {
public:
using HitsOnGPU = TrackingRecHit2DSOAView;
using HitsOnCPU = TrackingRecHit2DCUDA;
using HitsOnCPU = TrackingRecHit2DGPU;
using hindex_type = TrackingRecHit2DSOAView::hindex_type;

using Quality = pixelTrack::Quality;
Expand Down

0 comments on commit b8709ea

Please sign in to comment.