Skip to content

Commit

Permalink
Merge pull request #1269 from LLNL/feature/yang39/array-push-back
Browse files Browse the repository at this point in the history
Array: enable support for some device-side insertions
  • Loading branch information
publixsubfan committed Feb 22, 2024
2 parents 0a47cb2 + 60f7683 commit 60a3add
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 6 deletions.
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The Axom project release numbers follow [Semantic Versioning](http://semver.org/
negative, resulting in the signed volume becoming positive.
- Adds `FlatMap`, a generic key-value store which aims for drop-in compatibility
with `std::unordered_map`, but utilizes an open-addressing design.
- Adds support for device-side use of `Array::push_back()` and `Array::emplace_back()`.

### Changed
- `DistributedClosestPoint` outputs are now controlled by the `setOutput` method.
Expand Down
68 changes: 62 additions & 6 deletions src/axom/core/Array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,21 +595,27 @@ class Array : public ArrayBase<T, DIM, Array<T, DIM, SPACE>>
* \param [in] value the value to be added to the back.
*
* \note Reallocation is done if the new size will exceed the capacity.
* \note If used in a device kernel, the number of push_backs must not exceed
* the capacity, since device-side reallocations aren't supported.
* \note Array must be allocated in unified memory if calling on the device.
*
* \pre DIM == 1
*/
void push_back(const T& value);
AXOM_HOST_DEVICE void push_back(const T& value);

/*!
* \brief Push a value to the back of the array.
*
* \param [in] value the value to move to the back.
*
* \note Reallocation is done if the new size will exceed the capacity.
* \note If used in a device kernel, the number of push_backs must not exceed
* the capacity, since device-side reallocations aren't supported.
* \note Array must be allocated in unified memory if calling on the device.
*
* \pre DIM == 1
*/
void push_back(T&& value);
AXOM_HOST_DEVICE void push_back(T&& value);

/*!
* \brief Inserts new element at the end of the Array.
Expand All @@ -618,11 +624,14 @@ class Array : public ArrayBase<T, DIM, Array<T, DIM, SPACE>>
*
* \note Reallocation is done if the new size will exceed the capacity.
* \note The size increases by 1.
* \note If used in a device kernel, the number of push_backs must not exceed
* the capacity, since device-side reallocations aren't supported.
* \note Array must be allocated in unified memory if calling on the device.
*
* \pre DIM == 1
*/
template <typename... Args>
void emplace_back(Args&&... args);
AXOM_HOST_DEVICE void emplace_back(Args&&... args);

/// @}

Expand Down Expand Up @@ -849,6 +858,16 @@ class Array : public ArrayBase<T, DIM, Array<T, DIM, SPACE>>
*/
T* reserveForInsert(IndexType n, IndexType pos);

/*!
* \brief Make space for a subsequent insertion into the array.
*
* \param [in] n the number of elements to insert.
*
* \note This version supports concurrent GPU insertions.
* \note Reallocation is not supported.
*/
AXOM_DEVICE IndexType reserveForDeviceInsert(IndexType n);

/*!
* \brief Update the number of elements.
*
Expand Down Expand Up @@ -1309,15 +1328,15 @@ inline typename Array<T, DIM, SPACE>::ArrayIterator Array<T, DIM, SPACE>::emplac

//------------------------------------------------------------------------------
template <typename T, int DIM, MemorySpace SPACE>
inline void Array<T, DIM, SPACE>::push_back(const T& value)
AXOM_HOST_DEVICE inline void Array<T, DIM, SPACE>::push_back(const T& value)
{
static_assert(DIM == 1, "push_back is only supported for 1D arrays");
emplace_back(value);
}

//------------------------------------------------------------------------------
template <typename T, int DIM, MemorySpace SPACE>
inline void Array<T, DIM, SPACE>::push_back(T&& value)
AXOM_HOST_DEVICE inline void Array<T, DIM, SPACE>::push_back(T&& value)
{
static_assert(DIM == 1, "push_back is only supported for 1D arrays");
emplace_back(std::move(value));
Expand All @@ -1326,10 +1345,16 @@ inline void Array<T, DIM, SPACE>::push_back(T&& value)
//------------------------------------------------------------------------------
template <typename T, int DIM, MemorySpace SPACE>
template <typename... Args>
inline void Array<T, DIM, SPACE>::emplace_back(Args&&... args)
AXOM_HOST_DEVICE inline void Array<T, DIM, SPACE>::emplace_back(Args&&... args)
{
static_assert(DIM == 1, "emplace_back is only supported for 1D arrays");
#ifdef AXOM_DEVICE_CODE
IndexType insertIndex = reserveForDeviceInsert(1);
// Construct in-place in uninitialized memory.
new(m_data + insertIndex) T(std::forward<Args>(args)...);
#else
emplace(size(), std::forward<Args>(args)...);
#endif
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -1484,6 +1509,37 @@ inline T* Array<T, DIM, SPACE>::reserveForInsert(IndexType n, IndexType pos)
return m_data + pos;
}

//------------------------------------------------------------------------------
template <typename T, int DIM, MemorySpace SPACE>
AXOM_DEVICE inline IndexType Array<T, DIM, SPACE>::reserveForDeviceInsert(IndexType n)
{
#ifndef AXOM_DEVICE_CODE
// Host path: should never be called.
AXOM_UNUSED_VAR(n);
assert(false);
return {};
#else
// Device path: supports insertion while m_num_elements < m_capacity
// Does not support insertions which require reallocating the underlying
// buffer.
IndexType new_pos = RAJA::atomicAdd<RAJA::auto_atomic>(&m_num_elements, n);
if(new_pos >= m_capacity)
{
#ifdef AXOM_DEBUG
printf(
"Array::reserveForInsert: size() exceeded capacity() when inserting "
"on the device.\n");
#endif
#ifdef AXOM_USE_CUDA
__trap();
#elif defined(AXOM_USE_HIP)
abort();
#endif
}
return new_pos;
#endif
}

//------------------------------------------------------------------------------
template <typename T, int DIM, MemorySpace SPACE>
inline void Array<T, DIM, SPACE>::updateNumElements(IndexType new_num_elements)
Expand Down
88 changes: 88 additions & 0 deletions src/axom/core/tests/core_array_for_all.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1177,4 +1177,92 @@ AXOM_TYPED_TEST(core_array_for_all, nontrivial_emplace)
}
}

//------------------------------------------------------------------------------
constexpr int INSERT_ON_HOST = 1;
constexpr int INSERT_ON_DEVICE = 2;
struct DeviceInsert
{
AXOM_HOST_DEVICE DeviceInsert(int value) : m_value(value)
{
#ifdef AXOM_DEVICE_CODE
m_host_or_device = INSERT_ON_DEVICE;
#else
m_host_or_device = INSERT_ON_HOST;
#endif
}

int m_value;
int m_host_or_device;
};

AXOM_TYPED_TEST(core_array_for_all, device_insert)
{
using ExecSpace = typename TestFixture::ExecSpace;
using DynamicArray = typename TestFixture::template DynamicTArray<DeviceInsert>;
using DynamicArrayOfArrays =
typename TestFixture::template DynamicTArray<DynamicArray>;

int kernelAllocID = axom::execution_space<ExecSpace>::allocatorID();

constexpr axom::IndexType N = 374;

DynamicArrayOfArrays arr_container(1, 1, kernelAllocID);
arr_container[0] = DynamicArray(0, N, kernelAllocID);
const auto arr_v = arr_container.view();

EXPECT_EQ(arr_container[0].size(), 0);
EXPECT_EQ(arr_container[0].capacity(), N);

axom::for_all<ExecSpace>(
N,
AXOM_LAMBDA(axom::IndexType idx) {
#if defined(AXOM_USE_OPENMP) && defined(AXOM_USE_RAJA) && \
!defined(AXOM_DEVICE_CODE)
if(omp_in_parallel())
{
#pragma omp critical
{
arr_v[0].emplace_back(3 * idx + 5);
}
}
else
{
arr_v[0].emplace_back(3 * idx + 5);
}
#else
arr_v[0].emplace_back(3 * idx + 5);
#endif
});

// handles synchronization, if necessary
if(axom::execution_space<ExecSpace>::async())
{
axom::synchronize<ExecSpace>();
}

EXPECT_EQ(arr_container[0].size(), N);
EXPECT_EQ(arr_container[0].capacity(), N);

// Device-side inserts may occur in any order.
// Sort them before we check the inserted values.
std::sort(arr_container[0].begin(),
arr_container[0].end(),
[](const DeviceInsert& a, const DeviceInsert& b) -> bool {
return a.m_value < b.m_value;
});

for(int i = 0; i < N; i++)
{
EXPECT_EQ(arr_container[0][i].m_value, 3 * i + 5);
if(axom::execution_space<ExecSpace>::onDevice())
{
EXPECT_EQ(arr_container[0][i].m_host_or_device, INSERT_ON_DEVICE);
}
else
{
EXPECT_EQ(arr_container[0][i].m_host_or_device, INSERT_ON_HOST);
}
}
}

} // end namespace testing

0 comments on commit 60a3add

Please sign in to comment.