Integrate the comments from the upstream PRs (#442)

Clean up the Patatrack code base following the comments received during the integration into the upstream release. Currently tracks the changes introduced due to - cms-sw#29109: Patatrack integration - trivial changes (1/N) - cms-sw#29110: Patatrack integration - common tools (2/N) List of changes: * Remove unused files * Fix compilation warnings * Fix AtomicPairCounter unit test * Rename the cudaCompat namespace to cms::cudacompat * Remove extra semicolon * Move SimpleVector and VecArray to the cms::cuda namespace * Add missing dependency * Move HistoContainer, AtomicPairCounter, prefixScan and radixSort to the cms::cuda namespace * Remove rule exception for HeterogeneousCore * Fix code rule violations: - replace using namespace cms::cuda in test/OneToManyAssoc_t.h . - add an exception for cudaCompat.h: cudaCompat relies on defining equivalent symbols to the CUDA intrinsics in the cms::cudacompat namespace, and pulling them in the global namespace when compiling device code without CUDA. * Protect the headers to compile only with a CUDA compiler
cms-patatrack · Nov 16, 2020 · 0dd6cd4 · 0dd6cd4
1 parent 15d0d91
commit 0dd6cd4
Show file tree

Hide file tree

Showing 11 changed files with 40 additions and 40 deletions.
diff --git a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
@@ -17,7 +17,7 @@ class TrackSoAT {
 
   using Quality = trackQuality::Quality;
   using hindex_type = uint16_t;
-  using HitContainer = OneToManyAssoc<hindex_type, S, 5 * S>;
+  using HitContainer = cms::cuda::OneToManyAssoc<hindex_type, S, 5 * S>;
 
   // Always check quality is at least loose!
   // CUDA does not support enums  in __lgc ...

diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h b/RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h
@@ -322,7 +322,7 @@ namespace BrokenLine {
     std::cout << "CU5\n" << C_U << std::endl;
 #endif
     MatrixNplusONEd<N> I;
-    choleskyInversion::invert(C_U, I);
+    math::cholesky::invert(C_U, I);
     // MatrixNplusONEd<N> I = C_U.inverse();
 #ifdef CPP_DUMP
     std::cout << "I5\n" << I << std::endl;
@@ -443,7 +443,7 @@ namespace BrokenLine {
     std::cout << "CU4\n" << MatrixC_u(w, S, VarBeta) << std::endl;
 #endif
     MatrixNd<N> I;
-    choleskyInversion::invert(MatrixC_u(w, S, VarBeta), I);
+    math::cholesky::invert(MatrixC_u(w, S, VarBeta), I);
     //    MatrixNd<N> I=MatrixC_u(w,S,VarBeta).inverse();
 #ifdef CPP_DUMP
     std::cout << "I4\n" << I << std::endl;

diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h b/RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h
@@ -486,7 +486,7 @@ namespace Rfit {
       printIt(&V, "circle_fit - V:");
       cov_rad += scatter_cov_rad;
       printIt(&cov_rad, "circle_fit - cov_rad:");
-      choleskyInversion::invert(cov_rad, G);
+      math::cholesky::invert(cov_rad, G);
       // G = cov_rad.inverse();
       renorm = G.sum();
       G *= 1. / renorm;
@@ -889,11 +889,11 @@ namespace Rfit {
 
     // Build A^T V-1 A, where V-1 is the covariance of only the Y components.
     MatrixNd<N> Vy_inv;
-    choleskyInversion::invert(cov_with_ms, Vy_inv);
+    math::cholesky::invert(cov_with_ms, Vy_inv);
     // MatrixNd<N> Vy_inv = cov_with_ms.inverse();
     Eigen::Matrix<double, 2, 2> Cov_params = A * Vy_inv * A.transpose();
     // Compute the Covariance Matrix of the fit parameters
-    choleskyInversion::invert(Cov_params, Cov_params);
+    math::cholesky::invert(Cov_params, Cov_params);
 
     // Now Compute the Parameters in the form [2,1]
     // The first component is q.

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h b/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h
@@ -6,8 +6,8 @@
 #include <cuda_runtime.h>
 
 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
 
 // #define ONLY_PHICUT
@@ -48,21 +48,21 @@ namespace CAConstants {
   using tindex_type = uint16_t;  //  for tuples
 
 #ifndef ONLY_PHICUT
-  using CellNeighbors = GPU::VecArray<uint32_t, 36>;
-  using CellTracks = GPU::VecArray<tindex_type, 42>;
+  using CellNeighbors = cms::cuda::VecArray<uint32_t, 36>;
+  using CellTracks = cms::cuda::VecArray<tindex_type, 42>;
 #else
-  using CellNeighbors = GPU::VecArray<uint32_t, 64>;
-  using CellTracks = GPU::VecArray<tindex_type, 64>;
+  using CellNeighbors = cms::cuda::VecArray<uint32_t, 64>;
+  using CellTracks = cms::cuda::VecArray<tindex_type, 64>;
 #endif
 
-  using CellNeighborsVector = GPU::SimpleVector<CellNeighbors>;
-  using CellTracksVector = GPU::SimpleVector<CellTracks>;
+  using CellNeighborsVector = cms::cuda::SimpleVector<CellNeighbors>;
+  using CellTracksVector = cms::cuda::SimpleVector<CellTracks>;
 
-  using OuterHitOfCell = GPU::VecArray<uint32_t, maxCellsPerHit()>;
-  using TuplesContainer = OneToManyAssoc<hindex_type, maxTuples(), 5 * maxTuples()>;
+  using OuterHitOfCell = cms::cuda::VecArray<uint32_t, maxCellsPerHit()>;
+  using TuplesContainer = cms::cuda::OneToManyAssoc<hindex_type, maxTuples(), 5 * maxTuples()>;
   using HitToTuple =
-      OneToManyAssoc<tindex_type, pixelGPUConstants::maxNumberOfHits, 4 * maxTuples()>;  // 3.5 should be enough
-  using TupleMultiplicity = OneToManyAssoc<tindex_type, 8, maxTuples()>;
+      cms::cuda::OneToManyAssoc<tindex_type, pixelGPUConstants::maxNumberOfHits, 4 * maxTuples()>;  // 3.5 should be enough
+  using TupleMultiplicity = cms::cuda::OneToManyAssoc<tindex_type, 8, maxTuples()>;
 
 }  // namespace CAConstants
 

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h
@@ -189,20 +189,20 @@ class CAHitNtupletGeneratorKernels {
   uint32_t* device_nCells_ = nullptr;
 
   unique_ptr<HitToTuple> device_hitToTuple_;
-  AtomicPairCounter* device_hitToTuple_apc_ = nullptr;
+  cms::cuda::AtomicPairCounter* device_hitToTuple_apc_ = nullptr;
 
-  AtomicPairCounter* device_hitTuple_apc_ = nullptr;
+  cms::cuda::AtomicPairCounter* device_hitTuple_apc_ = nullptr;
 
   unique_ptr<TupleMultiplicity> device_tupleMultiplicity_;
 
   uint8_t* device_tmws_;
 
-  unique_ptr<AtomicPairCounter::c_type[]> device_storage_;
+  unique_ptr<cms::cuda::AtomicPairCounter::c_type[]> device_storage_;
   // params
   Params const& m_params;
 };
 
-using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels<cudaCompat::GPUTraits>;
-using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels<cudaCompat::CPUTraits>;
+using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels<cms::cudacompat::GPUTraits>;
+using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels<cms::cudacompat::CPUTraits>;
 
 #endif  // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h
@@ -24,13 +24,13 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
   device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);
 
   auto storageSize =
-      3 + (std::max(TupleMultiplicity::wsSize(), HitToTuple::wsSize()) + sizeof(AtomicPairCounter::c_type)) /
-              sizeof(AtomicPairCounter::c_type);
+      3 + (std::max(TupleMultiplicity::wsSize(), HitToTuple::wsSize()) + sizeof(cms::cuda::AtomicPairCounter::c_type)) /
+              sizeof(cms::cuda::AtomicPairCounter::c_type);
 
-  device_storage_ = Traits::template make_unique<AtomicPairCounter::c_type[]>(storageSize, stream);
+  device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(storageSize, stream);
 
-  device_hitTuple_apc_ = (AtomicPairCounter*)device_storage_.get();
-  device_hitToTuple_apc_ = (AtomicPairCounter*)device_storage_.get() + 1;
+  device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get();
+  device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;
   device_nCells_ = (uint32_t*)(device_storage_.get() + 2);
   device_tmws_ = (uint8_t*)(device_storage_.get() + 3);
 
@@ -41,7 +41,7 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
 #ifndef __CUDACC__
       constexpr
 #endif
-      (std::is_same<Traits, cudaCompat::GPUTraits>::value) {
+      (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
     cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));
   } else {
     *device_nCells_ = 0;

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h
@@ -33,7 +33,7 @@ using HitContainer = pixelTrack::HitContainer;
 
 __global__ void kernel_checkOverflows(HitContainer const *foundNtuplets,
                                       CAConstants::TupleMultiplicity *tupleMultiplicity,
-                                      AtomicPairCounter *apc,
+                                      cms::cuda::AtomicPairCounter *apc,
                                       GPUCACell const *__restrict__ cells,
                                       uint32_t const *__restrict__ nCells,
                                       CellNeighborsVector const *cellNeighbors,
@@ -190,8 +190,8 @@ __global__ void kernel_fastDuplicateRemover(GPUCACell const *__restrict__ cells,
   }
 }
 
-__global__ void kernel_connect(AtomicPairCounter *apc1,
-                               AtomicPairCounter *apc2,  // just to zero them,
+__global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
+                               cms::cuda::AtomicPairCounter *apc2,  // just to zero them,
                                GPUCACell::Hits const *__restrict__ hhp,
                                GPUCACell *cells,
                                uint32_t const *__restrict__ nCells,
@@ -268,7 +268,7 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp,
                                      uint32_t const *nCells,
                                      CellTracksVector *cellTracks,
                                      HitContainer *foundNtuplets,
-                                     AtomicPairCounter *apc,
+                                     cms::cuda::AtomicPairCounter *apc,
                                      Quality *__restrict__ quality,
                                      unsigned int minHitsPerNtuplet) {
   // recursive: not obvious to widen

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.h
@@ -8,7 +8,7 @@
 #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
 #include "FWCore/ParameterSet/interface/ParameterSet.h"
 #include "FWCore/Utilities/interface/EDGetToken.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
 
 #include "CAHitNtupletGeneratorKernels.h"
 #include "HelixFitOnGPU.h"

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h b/RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h
@@ -10,8 +10,8 @@
 #include <cuda_runtime.h>
 
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 #include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h"
 #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
@@ -31,7 +31,7 @@ class GPUCACell {
   using Hits = TrackingRecHit2DSOAView;
   using hindex_type = Hits::hindex_type;
 
-  using TmpTuple = GPU::VecArray<uint32_t, 6>;
+  using TmpTuple = cms::cuda::VecArray<uint32_t, 6>;
 
   using HitContainer = pixelTrack::HitContainer;
   using Quality = trackQuality::Quality;
@@ -246,7 +246,7 @@ class GPUCACell {
                                        GPUCACell* __restrict__ cells,
                                        CellTracksVector& cellTracks,
                                        HitContainer& foundNtuplets,
-                                       AtomicPairCounter& apc,
+                                       cms::cuda::AtomicPairCounter& apc,
                                        Quality* __restrict__ quality,
                                        TmpTuple& tmpNtuplet,
                                        const unsigned int minHitsPerNtuplet,

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h
@@ -9,7 +9,7 @@
 
 #include "DataFormats/Math/interface/approx_atan2.h"
 #include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 
 #include "GPUCACell.h"

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h
@@ -9,7 +9,7 @@
 
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
 #include "DataFormats/Math/interface/approx_atan2.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 
 #include "CAConstants.h"