Skip to content

Commit

Permalink
Integrate the comments from the upstream PRs (#442)
Browse files Browse the repository at this point in the history
Clean up the Patatrack code base following the comments received during the integration into the upstream release.

Currently tracks the changes introduced due to
   - cms-sw#29109: Patatrack integration - trivial changes (1/N)
   - cms-sw#29110: Patatrack integration - common tools (2/N)

List of changes:
 * Remove unused files
 * Fix compilation warnings
 * Fix AtomicPairCounter unit test
 * Rename the cudaCompat namespace to cms::cudacompat
 * Remove extra semicolon
 * Move SimpleVector and VecArray to the cms::cuda namespace
 * Add missing dependency
 * Move HistoContainer, AtomicPairCounter, prefixScan and radixSort to the cms::cuda namespace
 * Remove rule exception for HeterogeneousCore
 * Fix code rule violations:
    - replace using namespace cms::cuda in test/OneToManyAssoc_t.h .
    - add an exception for cudaCompat.h:
      cudaCompat relies on defining equivalent symbols to the CUDA
      intrinsics in the cms::cudacompat namespace, and pulling them in the
      global namespace when compiling device code without CUDA.
* Protect the headers to compile only with a CUDA compiler
  • Loading branch information
fwyzard committed Nov 16, 2020
1 parent a8e3595 commit 0187906
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 26 deletions.
14 changes: 7 additions & 7 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Expand Up @@ -4,7 +4,7 @@
#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"

#include <cuda_runtime.h>

Expand All @@ -21,20 +21,20 @@ class SiPixelDigiErrorsCUDA {

const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }

GPU::SimpleVector<PixelErrorCompact>* error() { return error_d.get(); }
GPU::SimpleVector<PixelErrorCompact> const* error() const { return error_d.get(); }
GPU::SimpleVector<PixelErrorCompact> const* c_error() const { return error_d.get(); }
cms::cuda::SimpleVector<PixelErrorCompact>* error() { return error_d.get(); }
cms::cuda::SimpleVector<PixelErrorCompact> const* error() const { return error_d.get(); }
cms::cuda::SimpleVector<PixelErrorCompact> const* c_error() const { return error_d.get(); }

using HostDataError =
std::pair<GPU::SimpleVector<PixelErrorCompact>, cms::cuda::host::unique_ptr<PixelErrorCompact[]>>;
std::pair<cms::cuda::SimpleVector<PixelErrorCompact>, cms::cuda::host::unique_ptr<PixelErrorCompact[]>>;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

void copyErrorToHostAsync(cudaStream_t stream);

private:
cms::cuda::device::unique_ptr<PixelErrorCompact[]> data_d;
cms::cuda::device::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_d;
cms::cuda::host::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_h;
cms::cuda::device::unique_ptr<cms::cuda::SimpleVector<PixelErrorCompact>> error_d;
cms::cuda::host::unique_ptr<cms::cuda::SimpleVector<PixelErrorCompact>> error_h;
PixelFormatterErrors formatterErrors_h;
};

Expand Down
6 changes: 3 additions & 3 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Expand Up @@ -9,13 +9,13 @@

SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream)
: formatterErrors_h(std::move(errors)) {
error_d = cms::cuda::make_device_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
error_d = cms::cuda::make_device_unique<cms::cuda::SimpleVector<PixelErrorCompact>>(stream);
data_d = cms::cuda::make_device_unique<PixelErrorCompact[]>(maxFedWords, stream);

cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream);

error_h = cms::cuda::make_host_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
error_h = cms::cuda::make_host_unique<cms::cuda::SimpleVector<PixelErrorCompact>>(stream);
cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

Expand Down
Expand Up @@ -88,7 +88,7 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
(std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
Expand Down Expand Up @@ -135,16 +135,16 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
(std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
}

using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cudaCompat::GPUTraits>;
using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cudaCompat::GPUTraits>;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cudaCompat::CPUTraits>;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cudaCompat::HostTraits>;
using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::CPUTraits>;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cms::cudacompat::HostTraits>;

#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
Expand Up @@ -17,7 +17,7 @@ class TrackingRecHit2DSOAView {
static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; }
using hindex_type = uint16_t; // if above is <=2^16

using Hist = HistoContainer<int16_t, 128, gpuClustering::MaxNumClusters, 8 * sizeof(int16_t), uint16_t, 10>;
using Hist = cms::cuda::HistoContainer<int16_t, 128, gpuClustering::MaxNumClusters, 8 * sizeof(int16_t), uint16_t, 10>;

using AverageGeometry = phase1PixelTopology::AverageGeometry;

Expand Down
Expand Up @@ -28,7 +28,7 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer<edm::Externa
edm::EDPutTokenT<SiPixelDigiErrorsSoA> digiErrorPutToken_;

cms::cuda::host::unique_ptr<PixelErrorCompact[]> data_;
GPU::SimpleVector<PixelErrorCompact> error_;
cms::cuda::SimpleVector<PixelErrorCompact> error_;
const PixelFormatterErrors* formatterErrors_ = nullptr;
};

Expand Down Expand Up @@ -70,7 +70,7 @@ void SiPixelDigiErrorsSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventS
// use cudaMallocHost without a GPU...
iEvent.emplace(digiErrorPutToken_, error_.size(), error_.data(), formatterErrors_);

error_ = GPU::make_SimpleVector<PixelErrorCompact>(0, nullptr);
error_ = cms::cuda::make_SimpleVector<PixelErrorCompact>(0, nullptr);
data_.reset();
formatterErrors_ = nullptr;
}
Expand Down
Expand Up @@ -365,7 +365,7 @@ namespace pixelgpudetails {
uint32_t *pdigi,
uint32_t *rawIdArr,
uint16_t *moduleId,
GPU::SimpleVector<PixelErrorCompact> *err,
cms::cuda::SimpleVector<PixelErrorCompact> *err,
bool useQualityInfo,
bool includeErrors,
bool debug) {
Expand Down Expand Up @@ -491,8 +491,8 @@ namespace pixelgpudetails {
}

__shared__ uint32_t ws[32];
blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws);
blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::MaxNumModules - 1024, ws);
cms::cuda::blockPrefixScan(moduleStart + 1, moduleStart + 1, 1024, ws);
cms::cuda::blockPrefixScan(moduleStart + 1025, moduleStart + 1025, gpuClustering::MaxNumModules - 1024, ws);

for (int i = first + 1025, iend = gpuClustering::MaxNumModules + 1; i < iend; i += blockDim.x) {
moduleStart[i] += moduleStart[1024];
Expand Down
Expand Up @@ -8,7 +8,7 @@
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "FWCore/Utilities/interface/typedefs.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"
#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
Expand Down
Expand Up @@ -89,7 +89,7 @@ namespace gpuClustering {

// renumber
__shared__ uint16_t ws[32];
blockPrefixScan(newclusId, nclus, ws);
cms::cuda::blockPrefixScan(newclusId, nclus, ws);

assert(nclus >= newclusId[nclus - 1]);

Expand Down
Expand Up @@ -80,7 +80,7 @@ namespace gpuClustering {
//init hist (ymax=416 < 512 : 9bits)
constexpr uint32_t maxPixInModule = 4000;
constexpr auto nbins = phase1PixelTopology::numColsInModule + 2; //2+2;
using Hist = HistoContainer<uint16_t, nbins, maxPixInModule, 9, uint16_t>;
using Hist = cms::cuda::HistoContainer<uint16_t, nbins, maxPixInModule, 9, uint16_t>;
__shared__ Hist hist;
__shared__ typename Hist::Counter ws[32];
for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
Expand Down
Expand Up @@ -221,7 +221,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()};
assert(digiView.adc(0) != 0);
// not needed...
cudaCompat::resetGrid();
cms::cudacompat::resetGrid();
// we run on blockId.x==0
gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view());
for (auto h = fc; h < lc; ++h)
Expand Down

0 comments on commit 0187906

Please sign in to comment.