From 43491cb5635a4da213a4d3151ed0a309bc1ced03 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 24 Jan 2023 18:32:13 +0100 Subject: [PATCH] Add a test for CUDA library build rules --- .../CUDATestDeviceLib/BuildFile.xml | 4 + .../interface/DeviceAddition.h | 12 +++ .../CUDATestDeviceLib/src/DeviceAddition.cu | 25 ++++++ .../CUDATestDeviceLib/test/BuildFile.xml | 8 ++ .../test/testDeviceAddition.cu | 89 +++++++++++++++++++ 5 files changed, 138 insertions(+) create mode 100644 HeterogeneousCore/CUDATestDeviceLib/BuildFile.xml create mode 100644 HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h create mode 100644 HeterogeneousCore/CUDATestDeviceLib/src/DeviceAddition.cu create mode 100644 HeterogeneousCore/CUDATestDeviceLib/test/BuildFile.xml create mode 100644 HeterogeneousCore/CUDATestDeviceLib/test/testDeviceAddition.cu diff --git a/HeterogeneousCore/CUDATestDeviceLib/BuildFile.xml b/HeterogeneousCore/CUDATestDeviceLib/BuildFile.xml new file mode 100644 index 0000000000000..fbb0f89e301db --- /dev/null +++ b/HeterogeneousCore/CUDATestDeviceLib/BuildFile.xml @@ -0,0 +1,4 @@ + + + + diff --git a/HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h b/HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h new file mode 100644 index 0000000000000..2fcd7b2fa93cc --- /dev/null +++ b/HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h @@ -0,0 +1,12 @@ +#ifndef HeterogeneousCore_CUDATestDeviceLib_interface_DeviceAddition_h +#define HeterogeneousCore_CUDATestDeviceLib_interface_DeviceAddition_h + +#include + +__device__ +void add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size); + +__device__ +void add_vectors_d(const double* __restrict__ in1, const double* __restrict__ in2, double* __restrict__ out, size_t size); + +#endif // HeterogeneousCore_CUDATestDeviceLib_interface_DeviceAddition_h diff --git a/HeterogeneousCore/CUDATestDeviceLib/src/DeviceAddition.cu b/HeterogeneousCore/CUDATestDeviceLib/src/DeviceAddition.cu new file mode 100644 index 0000000000000..009f8bceed3c2 --- /dev/null +++ b/HeterogeneousCore/CUDATestDeviceLib/src/DeviceAddition.cu @@ -0,0 +1,25 @@ +#include + +#include + +#include "HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h" + +__device__ +void add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size) { + uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x; + uint32_t stride = blockDim.x * gridDim.x; + + for (size_t i = thread; i < size; i += stride) { + out[i] = in1[i] + in2[i]; + } +} + +__device__ +void add_vectors_d(const double* __restrict__ in1, const double* __restrict__ in2, double* __restrict__ out, size_t size) { + uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x; + uint32_t stride = blockDim.x * gridDim.x; + + for (size_t i = thread; i < size; i += stride) { + out[i] = in1[i] + in2[i]; + } +} diff --git a/HeterogeneousCore/CUDATestDeviceLib/test/BuildFile.xml b/HeterogeneousCore/CUDATestDeviceLib/test/BuildFile.xml new file mode 100644 index 0000000000000..c76079c44be9d --- /dev/null +++ b/HeterogeneousCore/CUDATestDeviceLib/test/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/HeterogeneousCore/CUDATestDeviceLib/test/testDeviceAddition.cu b/HeterogeneousCore/CUDATestDeviceLib/test/testDeviceAddition.cu new file mode 100644 index 0000000000000..b6728f775652f --- /dev/null +++ b/HeterogeneousCore/CUDATestDeviceLib/test/testDeviceAddition.cu @@ -0,0 +1,89 @@ +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include + +#include "HeterogeneousCore/CUDATestDeviceLib/interface/DeviceAddition.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +/* +__device__ +void add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size) { + uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x; + uint32_t stride = blockDim.x * gridDim.x; + + for (size_t i = thread; i < size; i += stride) { + out[i] = in1[i] + in2[i]; + } +} +*/ + +__global__ +void kernel_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size) { + add_vectors_f(in1, in2, out, size); +} + + +TEST_CASE("HeterogeneousCore/CUDATestDeviceLib test", "[cudaTestDeviceAddition]") { + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + constexpr size_t size = 1024 * 1024; + + // allocate input and output host buffers + std::vector in1_h(size); + std::vector in2_h(size); + std::vector out_h(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (size_t i = 0; i < size; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + SECTION("Test add_vectors_f") { + // allocate input and output buffers on the device + float* in1_d; + float* in2_d; + float* out_d; + cudaCheck(cudaMalloc(&in1_d, size * sizeof(float))); + cudaCheck(cudaMalloc(&in2_d, size * sizeof(float))); + cudaCheck(cudaMalloc(&out_d, size * sizeof(float))); + + // copy the input data to the device + cudaCheck(cudaMemcpy(in1_d, in1_h.data(), size * sizeof(float), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(in2_d, in2_h.data(), size * sizeof(float), cudaMemcpyHostToDevice)); + + // fill the output buffer with zeros + cudaCheck(cudaMemset(out_d, 0, size * sizeof(float))); + + // launch the 1-dimensional kernel for vector addition + kernel_add_vectors_f<<<32, 32>>>(in1_d, in2_d, out_d, size); + cudaCheck(cudaGetLastError()); + + // copy the results from the device to the host + cudaCheck(cudaMemcpy(out_h.data(), out_d, size * sizeof(float), cudaMemcpyDeviceToHost)); + + // wait for all the operations to complete + cudaCheck(cudaDeviceSynchronize()); + + // check the results + for (size_t i = 0; i < size; ++i) { + float sum = in1_h[i] + in2_h[i]; + REQUIRE(out_h[i] < sum + epsilon); + REQUIRE(out_h[i] > sum - epsilon); + } + } + +}