Skip to content

Commit

Permalink
Integrate the comments from the upstream PRs (#442)
Browse files Browse the repository at this point in the history
Clean up the Patatrack code base following the comments received during the integration into the upstream release.

Currently tracks the changes introduced due to
   - cms-sw#29109: Patatrack integration - trivial changes (1/N)
   - cms-sw#29110: Patatrack integration - common tools (2/N)

List of changes:
 * Remove unused files
 * Fix compilation warnings
 * Fix AtomicPairCounter unit test
 * Rename the cudaCompat namespace to cms::cudacompat
 * Remove extra semicolon
 * Move SimpleVector and VecArray to the cms::cuda namespace
 * Add missing dependency
 * Move HistoContainer, AtomicPairCounter, prefixScan and radixSort to the cms::cuda namespace
 * Remove rule exception for HeterogeneousCore
 * Fix code rule violations:
    - replace using namespace cms::cuda in test/OneToManyAssoc_t.h .
    - add an exception for cudaCompat.h:
      cudaCompat relies on defining equivalent symbols to the CUDA
      intrinsics in the cms::cudacompat namespace, and pulling them in the
      global namespace when compiling device code without CUDA.
* Protect the headers to compile only with a CUDA compiler
  • Loading branch information
fwyzard committed Nov 16, 2020
1 parent 15d0d91 commit 0dd6cd4
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 40 deletions.
2 changes: 1 addition & 1 deletion CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
Expand Up @@ -17,7 +17,7 @@ class TrackSoAT {

using Quality = trackQuality::Quality;
using hindex_type = uint16_t;
using HitContainer = OneToManyAssoc<hindex_type, S, 5 * S>;
using HitContainer = cms::cuda::OneToManyAssoc<hindex_type, S, 5 * S>;

// Always check quality is at least loose!
// CUDA does not support enums in __lgc ...
Expand Down
4 changes: 2 additions & 2 deletions RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h
Expand Up @@ -322,7 +322,7 @@ namespace BrokenLine {
std::cout << "CU5\n" << C_U << std::endl;
#endif
MatrixNplusONEd<N> I;
choleskyInversion::invert(C_U, I);
math::cholesky::invert(C_U, I);
// MatrixNplusONEd<N> I = C_U.inverse();
#ifdef CPP_DUMP
std::cout << "I5\n" << I << std::endl;
Expand Down Expand Up @@ -443,7 +443,7 @@ namespace BrokenLine {
std::cout << "CU4\n" << MatrixC_u(w, S, VarBeta) << std::endl;
#endif
MatrixNd<N> I;
choleskyInversion::invert(MatrixC_u(w, S, VarBeta), I);
math::cholesky::invert(MatrixC_u(w, S, VarBeta), I);
// MatrixNd<N> I=MatrixC_u(w,S,VarBeta).inverse();
#ifdef CPP_DUMP
std::cout << "I4\n" << I << std::endl;
Expand Down
6 changes: 3 additions & 3 deletions RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h
Expand Up @@ -486,7 +486,7 @@ namespace Rfit {
printIt(&V, "circle_fit - V:");
cov_rad += scatter_cov_rad;
printIt(&cov_rad, "circle_fit - cov_rad:");
choleskyInversion::invert(cov_rad, G);
math::cholesky::invert(cov_rad, G);
// G = cov_rad.inverse();
renorm = G.sum();
G *= 1. / renorm;
Expand Down Expand Up @@ -889,11 +889,11 @@ namespace Rfit {

// Build A^T V-1 A, where V-1 is the covariance of only the Y components.
MatrixNd<N> Vy_inv;
choleskyInversion::invert(cov_with_ms, Vy_inv);
math::cholesky::invert(cov_with_ms, Vy_inv);
// MatrixNd<N> Vy_inv = cov_with_ms.inverse();
Eigen::Matrix<double, 2, 2> Cov_params = A * Vy_inv * A.transpose();
// Compute the Covariance Matrix of the fit parameters
choleskyInversion::invert(Cov_params, Cov_params);
math::cholesky::invert(Cov_params, Cov_params);

// Now Compute the Parameters in the form [2,1]
// The first component is q.
Expand Down
24 changes: 12 additions & 12 deletions RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h
Expand Up @@ -6,8 +6,8 @@
#include <cuda_runtime.h>

#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"

// #define ONLY_PHICUT
Expand Down Expand Up @@ -48,21 +48,21 @@ namespace CAConstants {
using tindex_type = uint16_t; // for tuples

#ifndef ONLY_PHICUT
using CellNeighbors = GPU::VecArray<uint32_t, 36>;
using CellTracks = GPU::VecArray<tindex_type, 42>;
using CellNeighbors = cms::cuda::VecArray<uint32_t, 36>;
using CellTracks = cms::cuda::VecArray<tindex_type, 42>;
#else
using CellNeighbors = GPU::VecArray<uint32_t, 64>;
using CellTracks = GPU::VecArray<tindex_type, 64>;
using CellNeighbors = cms::cuda::VecArray<uint32_t, 64>;
using CellTracks = cms::cuda::VecArray<tindex_type, 64>;
#endif

using CellNeighborsVector = GPU::SimpleVector<CellNeighbors>;
using CellTracksVector = GPU::SimpleVector<CellTracks>;
using CellNeighborsVector = cms::cuda::SimpleVector<CellNeighbors>;
using CellTracksVector = cms::cuda::SimpleVector<CellTracks>;

using OuterHitOfCell = GPU::VecArray<uint32_t, maxCellsPerHit()>;
using TuplesContainer = OneToManyAssoc<hindex_type, maxTuples(), 5 * maxTuples()>;
using OuterHitOfCell = cms::cuda::VecArray<uint32_t, maxCellsPerHit()>;
using TuplesContainer = cms::cuda::OneToManyAssoc<hindex_type, maxTuples(), 5 * maxTuples()>;
using HitToTuple =
OneToManyAssoc<tindex_type, pixelGPUConstants::maxNumberOfHits, 4 * maxTuples()>; // 3.5 should be enough
using TupleMultiplicity = OneToManyAssoc<tindex_type, 8, maxTuples()>;
cms::cuda::OneToManyAssoc<tindex_type, pixelGPUConstants::maxNumberOfHits, 4 * maxTuples()>; // 3.5 should be enough
using TupleMultiplicity = cms::cuda::OneToManyAssoc<tindex_type, 8, maxTuples()>;

} // namespace CAConstants

Expand Down
Expand Up @@ -189,20 +189,20 @@ class CAHitNtupletGeneratorKernels {
uint32_t* device_nCells_ = nullptr;

unique_ptr<HitToTuple> device_hitToTuple_;
AtomicPairCounter* device_hitToTuple_apc_ = nullptr;
cms::cuda::AtomicPairCounter* device_hitToTuple_apc_ = nullptr;

AtomicPairCounter* device_hitTuple_apc_ = nullptr;
cms::cuda::AtomicPairCounter* device_hitTuple_apc_ = nullptr;

unique_ptr<TupleMultiplicity> device_tupleMultiplicity_;

uint8_t* device_tmws_;

unique_ptr<AtomicPairCounter::c_type[]> device_storage_;
unique_ptr<cms::cuda::AtomicPairCounter::c_type[]> device_storage_;
// params
Params const& m_params;
};

using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels<cudaCompat::GPUTraits>;
using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels<cudaCompat::CPUTraits>;
using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels<cms::cudacompat::GPUTraits>;
using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels<cms::cudacompat::CPUTraits>;

#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
Expand Up @@ -24,13 +24,13 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
device_tupleMultiplicity_ = Traits::template make_unique<TupleMultiplicity>(stream);

auto storageSize =
3 + (std::max(TupleMultiplicity::wsSize(), HitToTuple::wsSize()) + sizeof(AtomicPairCounter::c_type)) /
sizeof(AtomicPairCounter::c_type);
3 + (std::max(TupleMultiplicity::wsSize(), HitToTuple::wsSize()) + sizeof(cms::cuda::AtomicPairCounter::c_type)) /
sizeof(cms::cuda::AtomicPairCounter::c_type);

device_storage_ = Traits::template make_unique<AtomicPairCounter::c_type[]>(storageSize, stream);
device_storage_ = Traits::template make_unique<cms::cuda::AtomicPairCounter::c_type[]>(storageSize, stream);

device_hitTuple_apc_ = (AtomicPairCounter*)device_storage_.get();
device_hitToTuple_apc_ = (AtomicPairCounter*)device_storage_.get() + 1;
device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get();
device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;
device_nCells_ = (uint32_t*)(device_storage_.get() + 2);
device_tmws_ = (uint8_t*)(device_storage_.get() + 3);

Expand All @@ -41,7 +41,7 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
(std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));
} else {
*device_nCells_ = 0;
Expand Down
Expand Up @@ -33,7 +33,7 @@ using HitContainer = pixelTrack::HitContainer;

__global__ void kernel_checkOverflows(HitContainer const *foundNtuplets,
CAConstants::TupleMultiplicity *tupleMultiplicity,
AtomicPairCounter *apc,
cms::cuda::AtomicPairCounter *apc,
GPUCACell const *__restrict__ cells,
uint32_t const *__restrict__ nCells,
CellNeighborsVector const *cellNeighbors,
Expand Down Expand Up @@ -190,8 +190,8 @@ __global__ void kernel_fastDuplicateRemover(GPUCACell const *__restrict__ cells,
}
}

__global__ void kernel_connect(AtomicPairCounter *apc1,
AtomicPairCounter *apc2, // just to zero them,
__global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1,
cms::cuda::AtomicPairCounter *apc2, // just to zero them,
GPUCACell::Hits const *__restrict__ hhp,
GPUCACell *cells,
uint32_t const *__restrict__ nCells,
Expand Down Expand Up @@ -268,7 +268,7 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp,
uint32_t const *nCells,
CellTracksVector *cellTracks,
HitContainer *foundNtuplets,
AtomicPairCounter *apc,
cms::cuda::AtomicPairCounter *apc,
Quality *__restrict__ quality,
unsigned int minHitsPerNtuplet) {
// recursive: not obvious to widen
Expand Down
Expand Up @@ -8,7 +8,7 @@
#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"

#include "CAHitNtupletGeneratorKernels.h"
#include "HelixFitOnGPU.h"
Expand Down
8 changes: 4 additions & 4 deletions RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h
Expand Up @@ -10,8 +10,8 @@
#include <cuda_runtime.h>

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h"
#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
Expand All @@ -31,7 +31,7 @@ class GPUCACell {
using Hits = TrackingRecHit2DSOAView;
using hindex_type = Hits::hindex_type;

using TmpTuple = GPU::VecArray<uint32_t, 6>;
using TmpTuple = cms::cuda::VecArray<uint32_t, 6>;

using HitContainer = pixelTrack::HitContainer;
using Quality = trackQuality::Quality;
Expand Down Expand Up @@ -246,7 +246,7 @@ class GPUCACell {
GPUCACell* __restrict__ cells,
CellTracksVector& cellTracks,
HitContainer& foundNtuplets,
AtomicPairCounter& apc,
cms::cuda::AtomicPairCounter& apc,
Quality* __restrict__ quality,
TmpTuple& tmpNtuplet,
const unsigned int minHitsPerNtuplet,
Expand Down
2 changes: 1 addition & 1 deletion RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h
Expand Up @@ -9,7 +9,7 @@

#include "DataFormats/Math/interface/approx_atan2.h"
#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"

#include "GPUCACell.h"
Expand Down
Expand Up @@ -9,7 +9,7 @@

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "DataFormats/Math/interface/approx_atan2.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"

#include "CAConstants.h"
Expand Down

0 comments on commit 0dd6cd4

Please sign in to comment.