forked from cms-sw/cmssw
-
Notifications
You must be signed in to change notification settings - Fork 5
/
SiPixelRawToClusterGPUKernel.h
223 lines (183 loc) · 8.03 KB
/
SiPixelRawToClusterGPUKernel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h
#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h
#include <algorithm>
#include <cuda_runtime.h>
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "FWCore/Utilities/interface/typedefs.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
struct SiPixelFedCablingMapGPU;
class SiPixelGainForHLTonGPU;
namespace pixelgpudetails {
// Phase 1 geometry constants
const uint32_t layerStartBit = 20;
const uint32_t ladderStartBit = 12;
const uint32_t moduleStartBit = 2;
const uint32_t panelStartBit = 10;
const uint32_t diskStartBit = 18;
const uint32_t bladeStartBit = 12;
const uint32_t layerMask = 0xF;
const uint32_t ladderMask = 0xFF;
const uint32_t moduleMask = 0x3FF;
const uint32_t panelMask = 0x3;
const uint32_t diskMask = 0xF;
const uint32_t bladeMask = 0x3F;
const uint32_t LINK_bits = 6;
const uint32_t ROC_bits = 5;
const uint32_t DCOL_bits = 5;
const uint32_t PXID_bits = 8;
const uint32_t ADC_bits = 8;
// special for layer 1
const uint32_t LINK_bits_l1 = 6;
const uint32_t ROC_bits_l1 = 5;
const uint32_t COL_bits_l1 = 6;
const uint32_t ROW_bits_l1 = 7;
const uint32_t OMIT_ERR_bits = 1;
const uint32_t maxROCIndex = 8;
const uint32_t numRowsInRoc = 80;
const uint32_t numColsInRoc = 52;
const uint32_t MAX_WORD = 2000;
const uint32_t ADC_shift = 0;
const uint32_t PXID_shift = ADC_shift + ADC_bits;
const uint32_t DCOL_shift = PXID_shift + PXID_bits;
const uint32_t ROC_shift = DCOL_shift + DCOL_bits;
const uint32_t LINK_shift = ROC_shift + ROC_bits_l1;
// special for layer 1 ROC
const uint32_t ROW_shift = ADC_shift + ADC_bits;
const uint32_t COL_shift = ROW_shift + ROW_bits_l1;
const uint32_t OMIT_ERR_shift = 20;
const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1);
const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1);
const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1);
const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1);
const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits);
const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits);
const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits);
const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1);
const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits);
struct DetIdGPU {
uint32_t RawId;
uint32_t rocInDet;
uint32_t moduleId;
};
struct Pixel {
uint32_t row;
uint32_t col;
};
class Packing {
public:
using PackedDigiType = uint32_t;
// Constructor: pre-computes masks and shifts from field widths
__host__ __device__ inline constexpr Packing(unsigned int row_w,
unsigned int column_w,
unsigned int time_w,
unsigned int adc_w)
: row_width(row_w),
column_width(column_w),
adc_width(adc_w),
row_shift(0),
column_shift(row_shift + row_w),
time_shift(column_shift + column_w),
adc_shift(time_shift + time_w),
row_mask(~(~0U << row_w)),
column_mask(~(~0U << column_w)),
time_mask(~(~0U << time_w)),
adc_mask(~(~0U << adc_w)),
rowcol_mask(~(~0U << (column_w + row_w))),
max_row(row_mask),
max_column(column_mask),
max_adc(adc_mask) {}
uint32_t row_width;
uint32_t column_width;
uint32_t adc_width;
uint32_t row_shift;
uint32_t column_shift;
uint32_t time_shift;
uint32_t adc_shift;
PackedDigiType row_mask;
PackedDigiType column_mask;
PackedDigiType time_mask;
PackedDigiType adc_mask;
PackedDigiType rowcol_mask;
uint32_t max_row;
uint32_t max_column;
uint32_t max_adc;
};
__host__ __device__ inline constexpr Packing packing() { return Packing(11, 11, 0, 10); }
__host__ __device__ inline uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) {
constexpr Packing thePacking = packing();
adc = std::min(adc, thePacking.max_adc);
return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift);
}
constexpr uint32_t pixelToChannel(int row, int col) {
constexpr Packing thePacking = packing();
return (row << thePacking.column_width) | col;
}
class SiPixelRawToClusterGPUKernel {
public:
class WordFedAppender {
public:
WordFedAppender();
~WordFedAppender() = default;
void initializeWordFed(int fedId, unsigned int wordCounterGPU, const cms_uint32_t* src, unsigned int length);
const unsigned int* word() const { return word_.get(); }
const unsigned char* fedId() const { return fedId_.get(); }
private:
cms::cuda::host::noncached::unique_ptr<unsigned int[]> word_;
cms::cuda::host::noncached::unique_ptr<unsigned char[]> fedId_;
};
SiPixelRawToClusterGPUKernel() = default;
~SiPixelRawToClusterGPUKernel() = default;
SiPixelRawToClusterGPUKernel(const SiPixelRawToClusterGPUKernel&) = delete;
SiPixelRawToClusterGPUKernel(SiPixelRawToClusterGPUKernel&&) = delete;
SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete;
SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete;
void makeClustersAsync(const SiPixelFedCablingMapGPU* cablingMap,
const unsigned char* modToUnp,
const SiPixelGainForHLTonGPU* gains,
const WordFedAppender& wordFed,
PixelFormatterErrors&& errors,
const uint32_t wordCounter,
const uint32_t fedCounter,
bool useQualityInfo,
bool includeErrors,
bool debug,
cudaStream_t stream);
std::pair<SiPixelDigisCUDA, SiPixelClustersCUDA> getResults() {
digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis);
clusters_d.setNClusters(nModules_Clusters_h[1]);
// need to explicitly deallocate while the associated CUDA
// stream is still alive
//
// technically the statement above is not true anymore now that
// the CUDA streams are cached within the cms::cuda::StreamCache, but it is
// still better to release as early as possible
nModules_Clusters_h.reset();
return std::make_pair(std::move(digis_d), std::move(clusters_d));
}
SiPixelDigiErrorsCUDA&& getErrors() { return std::move(digiErrors_d); }
private:
uint32_t nDigis = 0;
// Data to be put in the event
cms::cuda::host::unique_ptr<uint32_t[]> nModules_Clusters_h;
SiPixelDigisCUDA digis_d;
SiPixelClustersCUDA clusters_d;
SiPixelDigiErrorsCUDA digiErrors_d;
};
// see RecoLocalTracker/SiPixelClusterizer
// all are runtime const, should be specified in python _cfg.py
struct ADCThreshold {
const int thePixelThreshold = 1000; // default Pixel threshold in electrons
const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo
const float theClusterThreshold = 4000; // cluster threshold in electron
const int ConversionFactor = 65; // adc to electron conversion factor
const int theStackADC_ = 255; // the maximum adc count for stack layer
const int theFirstStack_ = 5; // the index of the fits stack layer
const double theElectronPerADCGain_ = 600; // ADC to electron conversion
};
} // namespace pixelgpudetails
#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h