New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Various Patatrack kernels #35835
Changes from all commits
8a86782
2abf845
ec82c77
2f5a936
92aa2a4
df4460f
7b435d6
ff2a9e8
38b75aa
503fd78
ac23ced
ea55c81
3268720
58fd079
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,8 @@ namespace gpuCalibPixel { | |
constexpr float VCaltoElectronOffset = -60; // L2-4: -60 +- 130 | ||
constexpr float VCaltoElectronOffset_L1 = -670; // L1: -670 +- 220 | ||
|
||
__global__ void calibDigis(bool isRun2, | ||
uint16_t* id, | ||
template <bool isRun2> | ||
__global__ void calibDigis(uint16_t* id, | ||
uint16_t const* __restrict__ x, | ||
uint16_t const* __restrict__ y, | ||
uint16_t* adc, | ||
|
@@ -42,9 +42,6 @@ namespace gpuCalibPixel { | |
if (invalidModuleId == id[i]) | ||
continue; | ||
|
||
float conversionFactor = (isRun2) ? (id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain) : 1.f; | ||
float offset = (isRun2) ? (id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset) : 0; | ||
|
||
bool isDeadColumn = false, isNoisyColumn = false; | ||
|
||
int row = x[i]; | ||
|
@@ -58,8 +55,13 @@ namespace gpuCalibPixel { | |
id[i] = invalidModuleId; | ||
adc[i] = 0; | ||
} else { | ||
float vcal = adc[i] * gain - pedestal * gain; | ||
adc[i] = std::max(100, int(vcal * conversionFactor + offset)); | ||
float vcal = float(adc[i]) * gain - pedestal * gain; | ||
if constexpr (isRun2) { | ||
float conversionFactor = id[i] < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; | ||
float offset = id[i] < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; | ||
vcal = vcal * conversionFactor + offset; | ||
} | ||
adc[i] = std::max(100, int(vcal)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cms-sw/trk-dpg-l2 would be nice if this 100 is taken from the same place as the other famous 100 in the regular clusterizer. |
||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,6 +135,11 @@ namespace gpuClustering { | |
#ifdef __CUDA_ARCH__ | ||
// assume that we can cover the whole module with up to 16 blockDim.x-wide iterations | ||
constexpr int maxiter = 16; | ||
if (threadIdx.x == 0 && (hist.size() / blockDim.x) >= maxiter) | ||
printf("THIS IS NOT SUPPOSED TO HAPPEN too many hits in module %d: %d for block size %d\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is an assert below but it's compiled away! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when we wiil have a Heterogeneous version, yes. |
||
thisModuleId, | ||
hist.size(), | ||
blockDim.x); | ||
#else | ||
auto maxiter = hist.size(); | ||
#endif | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,13 @@ | ||
#include "RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h" | ||
|
||
#include <mutex> | ||
|
||
namespace { | ||
// cuda atomics are NOT atomics on CPU so protect stat update with a mutex | ||
// waiting for a more general solution (incuding multiple devices) to be proposed and implemented | ||
std::mutex lock_stat; | ||
Comment on lines
+6
to
+8
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the cpu case, what is doing concurrent updates of the same stat ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you mean gpu? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. on cpu (w/o mutex) was obviously producing wrong (lower) results (as expected) |
||
} // namespace | ||
|
||
template <> | ||
void CAHitNtupletGeneratorKernelsCPU::printCounters(Counters const *counters) { | ||
kernel_printCounters(counters); | ||
|
@@ -134,25 +142,12 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * | |
if (nhits > 1 && params_.lateFishbone_) { | ||
gpuPixelDoublets::fishbone(hh.view(), device_theCells_.get(), device_nCells_, isOuterHitOfCell_, nhits, true); | ||
} | ||
|
||
if (params_.doStats_) { | ||
kernel_checkOverflows(tuples_d, | ||
device_tupleMultiplicity_.get(), | ||
device_hitToTuple_.get(), | ||
device_hitTuple_apc_, | ||
device_theCells_.get(), | ||
device_nCells_, | ||
device_theCellNeighbors_.get(), | ||
device_theCellTracks_.get(), | ||
isOuterHitOfCell_, | ||
nhits, | ||
params_.maxNumberOfDoublets_, | ||
counters_); | ||
} | ||
} | ||
|
||
template <> | ||
void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) { | ||
int32_t nhits = hh.nHits(); | ||
|
||
auto const *tuples_d = &tracks_d->hitIndices; | ||
auto *quality_d = tracks_d->qualityData(); | ||
|
||
|
@@ -209,8 +204,26 @@ void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA | |
device_hitToTuple_.get()); | ||
} | ||
} | ||
|
||
if (params_.doStats_) { | ||
std::lock_guard guard(lock_stat); | ||
kernel_checkOverflows(tuples_d, | ||
device_tupleMultiplicity_.get(), | ||
device_hitToTuple_.get(), | ||
device_hitTuple_apc_, | ||
device_theCells_.get(), | ||
device_nCells_, | ||
device_theCellNeighbors_.get(), | ||
device_theCellTracks_.get(), | ||
isOuterHitOfCell_, | ||
nhits, | ||
params_.maxNumberOfDoublets_, | ||
counters_); | ||
} | ||
|
||
if (params_.doStats_) { | ||
// counters (add flag???) | ||
std::lock_guard guard(lock_stat); | ||
Comment on lines
+222
to
+226
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need two separate scopes, instead of just one ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. historical |
||
kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_); | ||
kernel_doStatsForTracks(tuples_d, quality_d, counters_); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,9 +107,9 @@ __global__ void kernel_checkOverflows(HitContainer const *foundNtuplets, | |
printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId()); | ||
if (thisCell.isKilled()) | ||
atomicAdd(&c.nKilledCells, 1); | ||
if (thisCell.unused()) | ||
if (!thisCell.unused()) | ||
atomicAdd(&c.nEmptyCells, 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I count now "used" because they are much less (so faster). Then in the report I do "1"-. |
||
if (0 == hitToTuple->size(thisCell.inner_hit_id()) && 0 == hitToTuple->size(thisCell.outer_hit_id())) | ||
if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id()))) | ||
atomicAdd(&c.nZeroTrackCells, 1); | ||
} | ||
|
||
|
@@ -896,7 +896,7 @@ __global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *coun | |
"||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | " | ||
"nDupHits | " | ||
"nKilledCells | " | ||
"nEmptyCells | nZeroTrackCells ||\n"); | ||
"nUsedCells | nZeroTrackCells ||\n"); | ||
printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", | ||
c.nEvents, | ||
c.nHits, | ||
|
@@ -910,7 +910,7 @@ __global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *coun | |
c.nKilledCells, | ||
c.nEmptyCells, | ||
c.nZeroTrackCells); | ||
printf("Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f||\n", | ||
printf("Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f||\n", | ||
c.nEvents, | ||
c.nHits / double(c.nEvents), | ||
c.nCells / double(c.nEvents), | ||
|
@@ -920,7 +920,7 @@ __global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *coun | |
c.nGoodTracks / double(c.nEvents), | ||
c.nUsedHits / double(c.nEvents), | ||
c.nDupHits / double(c.nEvents), | ||
c.nKilledCells / double(c.nEvents), | ||
c.nKilledCells / double(c.nCells), | ||
c.nEmptyCells / double(c.nCells), | ||
c.nZeroTrackCells / double(c.nCells)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
96 ->
phase1PixelTopology::layerStart[1]
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a issue open to fix this everywhere (phase1PixelTopology::layerStart[1] may not compile, or compile and produce wrong results)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not compile and/or produce wrong results? Can you point me to the issue, (I might have missed it)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for record (thanks @VinInn ): #35370