Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #40605 from fwyzard/CUDATestDeviceLib
Add a test for CUDA library build rules
- Loading branch information
Showing
41 changed files
with
1,416 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<iftool name="cuda-gcc-support"> | ||
<use name="cuda"/> | ||
<export> | ||
<lib name="1"/> | ||
</export> | ||
</iftool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Introduction | ||
|
||
The packages `HeterogeneousTest/CUDADevice`, `HeterogeneousTest/CUDAKernel`, | ||
`HeterogeneousTest/CUDAWrapper` and `HeterogeneousTest/CUDAOpaque` implement a set of libraries, | ||
plugins and tests to exercise the build rules for CUDA. | ||
In particular, these tests show what is supported and what are the limitations implementing | ||
CUDA-based libraries, and using them from multiple plugins. | ||
|
||
|
||
# `HeterogeneousTest/CUDADevice` | ||
|
||
The package `HeterogeneousTest/CUDADevice` implements a library that defines and exports CUDA | ||
device-side functions: | ||
```c++ | ||
namespace cms::cudatest { | ||
|
||
__device__ void add_vectors_f(...); | ||
__device__ void add_vectors_d(...); | ||
|
||
} // namespace cms::cudatest | ||
``` | ||
The `plugins` directory implements the `CUDATestDeviceAdditionModule` `EDAnalyzer` that launches a | ||
CUDA kernel using the functions defined in ths library. As a byproduct this plugin also shows how | ||
to split an `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a | ||
device part (in a `.cu` file). | ||
The `test` directory implements the `testCudaDeviceAddition` binary that launches a CUDA kernel | ||
using these functions. | ||
It also contains the `testCUDATestDeviceAdditionModule.py` python configuration to exercise the | ||
`CUDATestDeviceAdditionModule` plugin. | ||
# Other packages | ||
For various ways in which this library and plugin can be tested, see also the other | ||
`HeterogeneousTest/CUDA...` packages: | ||
- [`HeterogeneousTest/CUDAKernel/README.md`](../../HeterogeneousTest/CUDAKernel/README.md) | ||
- [`HeterogeneousTest/CUDAWrapper/README.md`](../../HeterogeneousTest/CUDAWrapper/README.md) | ||
- [`HeterogeneousTest/CUDAOpaque/README.md`](../../HeterogeneousTest/CUDAOpaque/README.md) | ||
# Combining plugins | ||
`HeterogeneousTest/CUDAOpaque/test` contains the `testCUDATestAdditionModules.py` python | ||
configuration that tries to exercise all four plugins in a single application. | ||
Unfortunately, the CUDA kernels used in the `CUDATestDeviceAdditionModule` plugin and those used in | ||
the `HeterogeneousTest/CUDAKernel` library run into some kind of conflict, leading to the error | ||
``` | ||
HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu, line 17: | ||
cudaCheck(cudaGetLastError()); | ||
cudaErrorInvalidDeviceFunction: invalid device function | ||
``` | ||
Using together the other three plugins does work correctly. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#ifndef HeterogeneousTest_CUDADevice_interface_DeviceAddition_h | ||
#define HeterogeneousTest_CUDADevice_interface_DeviceAddition_h | ||
|
||
#include <cstddef> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
namespace cms::cudatest { | ||
|
||
__device__ void add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size); | ||
|
||
__device__ void add_vectors_d(const double* __restrict__ in1, | ||
const double* __restrict__ in2, | ||
double* __restrict__ out, | ||
size_t size); | ||
|
||
} // namespace cms::cudatest | ||
|
||
#endif // HeterogeneousTest_CUDADevice_interface_DeviceAddition_h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<iftool name="cuda-gcc-support"> | ||
<library file="*.cc *.cu" name="HeterogeneousTestCUDADevicePlugins"> | ||
<use name="cuda"/> | ||
<use name="FWCore/Framework"/> | ||
<use name="FWCore/ParameterSet"/> | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="HeterogeneousCore/CUDAServices"/> | ||
<use name="HeterogeneousCore/CUDAUtilities"/> | ||
<use name="HeterogeneousTest/CUDADevice"/> | ||
<flags EDM_PLUGIN="1"/> | ||
</library> | ||
</iftool> |
27 changes: 27 additions & 0 deletions
27
HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#include <cstddef> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "HeterogeneousTest/CUDADevice/interface/DeviceAddition.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" | ||
|
||
#include "CUDATestDeviceAdditionAlgo.h" | ||
|
||
namespace HeterogeneousCoreCUDATestDevicePlugins { | ||
|
||
__global__ void kernel_add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size) { | ||
cms::cudatest::add_vectors_f(in1, in2, out, size); | ||
} | ||
|
||
void wrapper_add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size) { | ||
kernel_add_vectors_f<<<32, 32>>>(in1, in2, out, size); | ||
cudaCheck(cudaGetLastError()); | ||
} | ||
|
||
} // namespace HeterogeneousCoreCUDATestDevicePlugins |
15 changes: 15 additions & 0 deletions
15
HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#ifndef HeterogeneousTest_CUDADevice_plugins_CUDATestDeviceAdditionAlgo_h | ||
#define HeterogeneousTest_CUDADevice_plugins_CUDATestDeviceAdditionAlgo_h | ||
|
||
#include <cstddef> | ||
|
||
namespace HeterogeneousCoreCUDATestDevicePlugins { | ||
|
||
void wrapper_add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size); | ||
|
||
} // namespace HeterogeneousCoreCUDATestDevicePlugins | ||
|
||
#endif // HeterogeneousTest_CUDADevice_plugins_CUDATestDeviceAdditionAlgo_h |
106 changes: 106 additions & 0 deletions
106
HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <iostream> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "FWCore/Framework/interface/Event.h" | ||
#include "FWCore/Framework/interface/Frameworkfwd.h" | ||
#include "FWCore/Framework/interface/global/EDAnalyzer.h" | ||
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" | ||
#include "FWCore/ParameterSet/interface/ParameterSet.h" | ||
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" | ||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" | ||
|
||
#include "CUDATestDeviceAdditionAlgo.h" | ||
|
||
class CUDATestDeviceAdditionModule : public edm::global::EDAnalyzer<> { | ||
public: | ||
explicit CUDATestDeviceAdditionModule(edm::ParameterSet const& config); | ||
~CUDATestDeviceAdditionModule() override = default; | ||
|
||
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); | ||
|
||
void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override; | ||
|
||
private: | ||
const uint32_t size_; | ||
}; | ||
|
||
CUDATestDeviceAdditionModule::CUDATestDeviceAdditionModule(edm::ParameterSet const& config) | ||
: size_(config.getParameter<uint32_t>("size")) {} | ||
|
||
void CUDATestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { | ||
edm::ParameterSetDescription desc; | ||
desc.add<uint32_t>("size", 1024 * 1024); | ||
descriptions.addWithDefaultLabel(desc); | ||
} | ||
|
||
void CUDATestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { | ||
// require CUDA for running | ||
edm::Service<CUDAService> cs; | ||
if (not cs->enabled()) { | ||
std::cout << "The CUDAService is disabled, the test will be skipped.\n"; | ||
return; | ||
} | ||
|
||
// random number generator with a gaussian distribution | ||
std::random_device rd{}; | ||
std::default_random_engine rand{rd()}; | ||
std::normal_distribution<float> dist{0., 1.}; | ||
|
||
// tolerance | ||
constexpr float epsilon = 0.000001; | ||
|
||
// allocate input and output host buffers | ||
std::vector<float> in1_h(size_); | ||
std::vector<float> in2_h(size_); | ||
std::vector<float> out_h(size_); | ||
|
||
// fill the input buffers with random data, and the output buffer with zeros | ||
for (size_t i = 0; i < size_; ++i) { | ||
in1_h[i] = dist(rand); | ||
in2_h[i] = dist(rand); | ||
out_h[i] = 0.; | ||
} | ||
|
||
// allocate input and output buffers on the device | ||
float* in1_d; | ||
float* in2_d; | ||
float* out_d; | ||
cudaCheck(cudaMalloc(&in1_d, size_ * sizeof(float))); | ||
cudaCheck(cudaMalloc(&in2_d, size_ * sizeof(float))); | ||
cudaCheck(cudaMalloc(&out_d, size_ * sizeof(float))); | ||
|
||
// copy the input data to the device | ||
cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice)); | ||
cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), cudaMemcpyHostToDevice)); | ||
|
||
// fill the output buffer with zeros | ||
cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float))); | ||
|
||
// launch the 1-dimensional kernel for vector addition | ||
HeterogeneousCoreCUDATestDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); | ||
|
||
// copy the results from the device to the host | ||
cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost)); | ||
|
||
// wait for all the operations to complete | ||
cudaCheck(cudaDeviceSynchronize()); | ||
|
||
// check the results | ||
for (size_t i = 0; i < size_; ++i) { | ||
float sum = in1_h[i] + in2_h[i]; | ||
assert(out_h[i] < sum + epsilon); | ||
assert(out_h[i] > sum - epsilon); | ||
} | ||
|
||
std::cout << "All tests passed.\n"; | ||
} | ||
|
||
#include "FWCore/Framework/interface/MakerMacros.h" | ||
DEFINE_FWK_MODULE(CUDATestDeviceAdditionModule); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#include <cstddef> | ||
#include <cstdint> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "HeterogeneousTest/CUDADevice/interface/DeviceAddition.h" | ||
|
||
namespace cms::cudatest { | ||
|
||
__device__ void add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size) { | ||
uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x; | ||
uint32_t stride = blockDim.x * gridDim.x; | ||
|
||
for (size_t i = thread; i < size; i += stride) { | ||
out[i] = in1[i] + in2[i]; | ||
} | ||
} | ||
|
||
__device__ void add_vectors_d(const double* __restrict__ in1, | ||
const double* __restrict__ in2, | ||
double* __restrict__ out, | ||
size_t size) { | ||
uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x; | ||
uint32_t stride = blockDim.x * gridDim.x; | ||
|
||
for (size_t i = thread; i < size; i += stride) { | ||
out[i] = in1[i] + in2[i]; | ||
} | ||
} | ||
|
||
} // namespace cms::cudatest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<iftool name="cuda-gcc-support"> | ||
<bin file="testDeviceAddition.cu" name="testCudaDeviceAddition"> | ||
<use name="catch2"/> | ||
<use name="cuda"/> | ||
<use name="HeterogeneousTest/CUDADevice"/> | ||
<use name="HeterogeneousCore/CUDAUtilities"/> | ||
</bin> | ||
|
||
<test name="testCUDATestDeviceAdditionModule" command="cmsRun ${LOCALTOP}/src/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py"/> | ||
</iftool> |
15 changes: 15 additions & 0 deletions
15
HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import FWCore.ParameterSet.Config as cms | ||
|
||
process = cms.Process('TestCUDATestDeviceAdditionModule') | ||
|
||
process.source = cms.Source('EmptySource') | ||
|
||
process.CUDAService = cms.Service('CUDAService') | ||
|
||
process.cudaTestDeviceAdditionModule = cms.EDAnalyzer('CUDATestDeviceAdditionModule', | ||
size = cms.uint32( 1024*1024 ) | ||
) | ||
|
||
process.path = cms.Path(process.cudaTestDeviceAdditionModule) | ||
|
||
process.maxEvents.input = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <random> | ||
#include <vector> | ||
|
||
#define CATCH_CONFIG_MAIN | ||
#include <catch.hpp> | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "HeterogeneousTest/CUDADevice/interface/DeviceAddition.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" | ||
|
||
__global__ void kernel_add_vectors_f(const float* __restrict__ in1, | ||
const float* __restrict__ in2, | ||
float* __restrict__ out, | ||
size_t size) { | ||
cms::cudatest::add_vectors_f(in1, in2, out, size); | ||
} | ||
|
||
TEST_CASE("HeterogeneousTest/CUDADevice test", "[cudaTestDeviceAddition]") { | ||
cms::cudatest::requireDevices(); | ||
|
||
// random number generator with a gaussian distribution | ||
std::random_device rd{}; | ||
std::default_random_engine rand{rd()}; | ||
std::normal_distribution<float> dist{0., 1.}; | ||
|
||
// tolerance | ||
constexpr float epsilon = 0.000001; | ||
|
||
// buffer size | ||
constexpr size_t size = 1024 * 1024; | ||
|
||
// allocate input and output host buffers | ||
std::vector<float> in1_h(size); | ||
std::vector<float> in2_h(size); | ||
std::vector<float> out_h(size); | ||
|
||
// fill the input buffers with random data, and the output buffer with zeros | ||
for (size_t i = 0; i < size; ++i) { | ||
in1_h[i] = dist(rand); | ||
in2_h[i] = dist(rand); | ||
out_h[i] = 0.; | ||
} | ||
|
||
SECTION("Test add_vectors_f") { | ||
// allocate input and output buffers on the device | ||
float* in1_d; | ||
float* in2_d; | ||
float* out_d; | ||
REQUIRE_NOTHROW(cudaCheck(cudaMalloc(&in1_d, size * sizeof(float)))); | ||
REQUIRE_NOTHROW(cudaCheck(cudaMalloc(&in2_d, size * sizeof(float)))); | ||
REQUIRE_NOTHROW(cudaCheck(cudaMalloc(&out_d, size * sizeof(float)))); | ||
|
||
// copy the input data to the device | ||
REQUIRE_NOTHROW(cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size * sizeof(float), cudaMemcpyHostToDevice))); | ||
REQUIRE_NOTHROW(cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size * sizeof(float), cudaMemcpyHostToDevice))); | ||
|
||
// fill the output buffer with zeros | ||
REQUIRE_NOTHROW(cudaCheck(cudaMemset(out_d, 0, size * sizeof(float)))); | ||
|
||
// launch the 1-dimensional kernel for vector addition | ||
kernel_add_vectors_f<<<32, 32>>>(in1_d, in2_d, out_d, size); | ||
REQUIRE_NOTHROW(cudaCheck(cudaGetLastError())); | ||
|
||
// copy the results from the device to the host | ||
REQUIRE_NOTHROW(cudaCheck(cudaMemcpy(out_h.data(), out_d, size * sizeof(float), cudaMemcpyDeviceToHost))); | ||
|
||
// wait for all the operations to complete | ||
REQUIRE_NOTHROW(cudaCheck(cudaDeviceSynchronize())); | ||
|
||
// check the results | ||
for (size_t i = 0; i < size; ++i) { | ||
float sum = in1_h[i] + in2_h[i]; | ||
CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<iftool name="cuda-gcc-support"> | ||
<use name="cuda"/> | ||
<use name="HeterogeneousTest/CUDADevice"/> | ||
<export> | ||
<lib name="1"/> | ||
</export> | ||
</iftool> |
Oops, something went wrong.