Skip to content

Commit

Permalink
Reimplement the ECAL multifit with CUDA, for an HLT-like configuration (
Browse files Browse the repository at this point in the history
cms-sw#335)

Reimplementation of the orignal cpu-based ECAL multifit to run on Nvidia GPUs with CUDA,
for an HLT-like configuration, where regression matrices are fixed at 10x10 (no dynamic pedestal, etc...).

The main computation type is float by default, configurable at compile time, and the minimization (Cholesky + solvers, etc...) is implemented using using the Eigen library.
The timing computation is implemented, but is not run by default at HLT.

Tha implementation:
  - uses one CUDA stream per EDM stream;
  - the EventSetup conditions are updated on the GPU only they the change, via the CUDAESProduct mechanism;
  - only the per-event data is transferred for each event;
  - the results are optionally copied back to the host and synchronised by the produce() method.

A simple tool is available to validate cpu vs gpu results.

Known issues and to do list:
  - add a module to convert from new format to the legacy format;
  - make use of the CUDAService framework for the device selection, stream handling and memory allocation;
  - investigate some instabilities in the Cholesky decomposition vs original cpu version, specifically [for fnnls](https://github.com/cms-patatrack/cmssw/pull/335/files#diff-ed446c49128ac6dc6f45eeebab079613R70) causing rare, but noticeable discrepancies between cpu and gpu versions.
  • Loading branch information
vkhristenko authored and fwyzard committed Jun 25, 2019
1 parent 957e184 commit 0b72281
Show file tree
Hide file tree
Showing 44 changed files with 5,285 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CUDADataFormats/EcalRecHitSoA/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<use name="DataFormats/Common"/>
<use name="DataFormats/EcalDigi"/>
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_soa_h
#define CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_soa_h

#include <vector>
#include <array>

#include "DataFormats/EcalDigi/interface/EcalDataFrame.h"

#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h"
#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h"

namespace ecal {

namespace Tag {

struct soa {};
struct ptr {};

}

template<typename T, typename L = Tag::soa>
struct type_wrapper {
//#ifndef ECAL_MULTIFIT_DONOT_USE_PINNED_MEM
// using type = std::vector<T, CUDAHostAllocator<T>>;
//#else
using type = std::vector<T>;
//#endif
};

template<typename T>
struct type_wrapper<T, Tag::ptr> {
using type = T*;
};

template<typename L = Tag::soa>
struct UncalibratedRecHit {
UncalibratedRecHit() = default;
UncalibratedRecHit(const UncalibratedRecHit&) = default;
UncalibratedRecHit& operator=(const UncalibratedRecHit&) = default;

UncalibratedRecHit(UncalibratedRecHit&&) = default;
UncalibratedRecHit& operator=(UncalibratedRecHit&&) = default;

// TODO: std::array causes root's dictionary problems
typename type_wrapper<reco::ComputationScalarType, L>::type amplitudesAll;
// typename type_wrapper<std::array<reco::ComputationScalarType,
// EcalDataFrame::MAXSAMPLES>, L>::type amplitudesAll;
typename type_wrapper<reco::StorageScalarType, L>::type amplitude;
typename type_wrapper<reco::StorageScalarType, L>::type chi2;
typename type_wrapper<reco::StorageScalarType, L>::type pedestal;
typename type_wrapper<reco::StorageScalarType, L>::type jitter;
typename type_wrapper<reco::StorageScalarType, L>::type jitterError;
typename type_wrapper<uint32_t, L>::type did;
typename type_wrapper<uint32_t, L>::type flags;

template<typename U = L>
typename std::enable_if<std::is_same<U, Tag::soa>::value, void>::type
resize(size_t size) {
amplitudesAll.resize(size * EcalDataFrame::MAXSAMPLES);
amplitude.resize(size);
pedestal.resize(size);
chi2.resize(size);
did.resize(size);
flags.resize(size);
jitter.resize(size);
jitterError.resize(size);
}
};

using SoAUncalibratedRecHitCollection = UncalibratedRecHit<Tag::soa>;

}

#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalUncalibratedRecHit_soa_h
11 changes: 11 additions & 0 deletions CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef CUDADataFormats_EcalRecHitSoA_interface_RecoTypes
#define CUDADataFormats_EcalRecHitSoA_interface_RecoTypes

namespace ecal { namespace reco {

using ComputationScalarType = float;
using StorageScalarType = float;

}}

#endif
2 changes: 2 additions & 0 deletions CUDADataFormats/EcalRecHitSoA/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#include "DataFormats/Common/interface/Wrapper.h"
#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit_soa.h"
15 changes: 15 additions & 0 deletions CUDADataFormats/EcalRecHitSoA/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<lcgdict>
<class name="ecal::Tag::soa"/>
<class name="ecal::Tag::aos"/>

<!--
<class name="std::vector<double, CUDAHostAllocator<double> >"/>
<class name="std::vector<float, CUDAHostAllocator<float> >"/>
<class name="std::vector<unsigned int, CUDAHostAllocator<unsigned int> >" />
-->

<!-- <class name="std::array<double, 10>" />
<class name="std::array<float, 10>" /> -->
<class name="ecal::UncalibratedRecHit<ecal::Tag::soa>"/>
<class name="edm::Wrapper<ecal::UncalibratedRecHit<ecal::Tag::soa> >"/>
</lcgdict>
5 changes: 5 additions & 0 deletions RecoLocalCalo/EcalRecAlgos/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
<use name="root"/>
<use name="rootminuit"/>
<use name="eigen"/>
<use name="cuda"/>
<use name="CUDADataFormats/EcalRecHitSoA"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda-api-wrappers"/>
<use name="HeterogeneousCore/CUDACore"/>

<export>
<lib name="1"/>
Expand Down
48 changes: 48 additions & 0 deletions RecoLocalCalo/EcalRecAlgos/interface/Common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef RecoLocalCalo_EcalRecAlgos_interface_Common_h
#define RecoLocalCalo_EcalRecAlgos_interface_Common_h

#include <cstdint>
#include <cmath>
#include <cassert>
#include <chrono>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime.h>

// a workaround for std::abs not being a constexpr function
namespace ecal {

template<typename T>
constexpr T abs(T const& value) {
return ::std::max(value, -value);
}

// temporary
namespace mgpa {

constexpr int adc(uint16_t sample) { return sample & 0xfff; }
constexpr int gainId(uint16_t sample) { return (sample>>12) & 0x3; }

}

}

template<typename T>
struct DurationMeasurer {
DurationMeasurer(std::string const& msg)
: msg_{msg}, start_{std::chrono::high_resolution_clock::now()}
{}

~DurationMeasurer() {
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<T>(end - start_).count();
std::cout << msg_ << "\nduration = " << duration << std::endl;
}

private:
std::string msg_;
std::chrono::high_resolution_clock::time_point start_;
};

#endif
Loading

0 comments on commit 0b72281

Please sign in to comment.