Skip to content

Commit

Permalink
Merge pull request #1295 from LLNL/feature/han12/BVH_silo_hip_changes
Browse files Browse the repository at this point in the history
HIP intrinsics, candidate examples init change
  • Loading branch information
bmhan12 committed Mar 26, 2024
2 parents 03cbb62 + 40e50bb commit 5743116
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 94 deletions.
14 changes: 13 additions & 1 deletion src/axom/core/utilities/BitUtilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@
#include "axom/core/Types.hpp"

// CUDA intrinsics: https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html
// TODO: Support HIP intrinsics (https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html)
// HIP intrinsics: https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html

// Check for and setup defines for platform-specific intrinsics
// Note: `__GNUC__` is defined for the gnu, clang and intel compilers
#if defined(AXOM_USE_CUDA)
// Intrinsics included implicitly

#elif defined(AXOM_USE_HIP)
#include <hip/hip_runtime.h>

#elif defined(_WIN64) && (_MSC_VER >= 1600)
#define _AXOM_CORE_USE_INTRINSICS_MSVC
#include <intrin.h>
Expand Down Expand Up @@ -89,6 +93,8 @@ AXOM_HOST_DEVICE inline int countr_zero(std::uint64_t word) noexcept
/* clang-format off */
#if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA)
return word != std::uint64_t(0) ? __ffsll(word) - 1 : 64;
#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP)
return word != std::uint64_t(0) ? __ffsll(static_cast<unsigned long long int>(word)) - 1 : 64;
#elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC)
unsigned long cnt;
return _BitScanForward64(&cnt, word) ? cnt : 64;
Expand Down Expand Up @@ -127,6 +133,9 @@ AXOM_HOST_DEVICE inline int popcount(std::uint64_t word) noexcept
#if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA)
// Use CUDA intrinsic for popcount
return __popcll(word);
#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP)
// Use HIP intrinsic for popcount
return __popcll(word);
#elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC)
return __popcnt64(word);
#elif defined(_AXOM_CORE_USE_INTRINSICS_GCC) || defined(_AXOM_CORE_USE_INTRINSICS_PPC)
Expand Down Expand Up @@ -166,6 +175,9 @@ AXOM_HOST_DEVICE inline std::int32_t countl_zero(std::int32_t word) noexcept
#if defined(__CUDA_ARCH__) && defined(AXOM_USE_CUDA)
// Use CUDA intrinsic for count leading zeros
return __clz(word);
#elif defined(__HIP_DEVICE_COMPILE__) && defined(AXOM_USE_HIP)
// Use HIP intrinsic for count leading zeros
return __clz(word);
#elif defined(_AXOM_CORE_USE_INTRINSICS_MSVC)
unsigned long cnt;
return _BitScanReverse(&cnt, word) ? 31 - cnt : 32;
Expand Down
246 changes: 153 additions & 93 deletions src/axom/quest/examples/quest_candidates_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,21 @@ struct HexMesh
BoundingBox m_meshBoundingBox;
};

template <typename ExecSpace>
HexMesh loadBlueprintHexMesh(const std::string& mesh_path,
bool verboseOutput = false)
{
using HexArray = axom::Array<typename HexMesh::Hexahedron>;
using BBoxArray = axom::Array<typename HexMesh::BoundingBox>;
constexpr bool on_device = axom::execution_space<ExecSpace>::onDevice();

// Get ids of necessary allocators
const int host_allocator =
axom::getUmpireResourceAllocatorID(umpire::resource::Host);
const int kernel_allocator = on_device
? axom::getUmpireResourceAllocatorID(umpire::resource::Device)
: axom::execution_space<ExecSpace>::allocatorID();

HexMesh hexMesh;

// Load Blueprint mesh into Conduit node
Expand Down Expand Up @@ -292,37 +304,83 @@ HexMesh loadBlueprintHexMesh(const std::string& mesh_path,
}

// extract hexes into an axom::Array
int* connectivity = n_load[0]["topologies/topo/elements/connectivity"].value();
auto x_vals_h =
axom::ArrayView<double>(n_load[0]["coordsets/coords/values/x"].value(),
num_nodes);
auto y_vals_h =
axom::ArrayView<double>(n_load[0]["coordsets/coords/values/y"].value(),
num_nodes);
auto z_vals_h =
axom::ArrayView<double>(n_load[0]["coordsets/coords/values/z"].value(),
num_nodes);

// Move xyz values onto device
axom::Array<double> x_vals_d = on_device
? axom::Array<double>(x_vals_h, kernel_allocator)
: axom::Array<double>();
auto x_vals_view = on_device ? x_vals_d.view() : x_vals_h;

axom::Array<double> y_vals_d = on_device
? axom::Array<double>(y_vals_h, kernel_allocator)
: axom::Array<double>();
auto y_vals_view = on_device ? y_vals_d.view() : y_vals_h;

axom::Array<double> z_vals_d = on_device
? axom::Array<double>(z_vals_h, kernel_allocator)
: axom::Array<double>();
auto z_vals_view = on_device ? z_vals_d.view() : z_vals_h;

// Move connectivity information onto device
auto connectivity_h = axom::ArrayView<int>(
n_load[0]["topologies/topo/elements/connectivity"].value(),
connectivity_size);

axom::Array<int> connectivity_d = on_device
? axom::Array<int>(connectivity_h, kernel_allocator)
: axom::Array<int>();
auto connectivity_view = on_device ? connectivity_d.view() : connectivity_h;

// Initialize hex elements and bounding boxes
const int numCells = connectivity_size / HEX_OFFSET;

double* x_vals = n_load[0]["coordsets/coords/values/x"].value();
double* y_vals = n_load[0]["coordsets/coords/values/y"].value();
double* z_vals = n_load[0]["coordsets/coords/values/z"].value();
hexMesh.m_hexes = HexArray(numCells, numCells, kernel_allocator);
auto m_hexes_v = (hexMesh.m_hexes).view();

const int numCells = connectivity_size / HEX_OFFSET;
hexMesh.m_hexes.reserve(numCells);
HexMesh::Hexahedron hex;
axom::Array<HexMesh::Point> hexPoints(HEX_OFFSET);
hexMesh.m_hexBoundingBoxes = BBoxArray(numCells, numCells, kernel_allocator);
auto m_hexBoundingBoxes_v = (hexMesh.m_hexBoundingBoxes).view();

for(int i = 0; i < numCells; ++i)
{
for(int j = 0; j < HEX_OFFSET; j++)
{
int offset = i * HEX_OFFSET;
hexPoints[j] = HexMesh::Point({x_vals[connectivity[offset + j]],
y_vals[connectivity[offset + j]],
z_vals[connectivity[offset + j]]});
}
hex = HexMesh::Hexahedron(hexPoints);
hexMesh.m_hexes.emplace_back(hex);
}
axom::for_all<ExecSpace>(
numCells,
AXOM_LAMBDA(axom::IndexType icell) {
HexMesh::Hexahedron hex;
HexMesh::Point hexPoints[HEX_OFFSET];
for(int j = 0; j < HEX_OFFSET; j++)
{
int offset = icell * HEX_OFFSET;
hexPoints[j] =
HexMesh::Point({x_vals_view[connectivity_view[offset + j]],
y_vals_view[connectivity_view[offset + j]],
z_vals_view[connectivity_view[offset + j]]});
}
hex = HexMesh::Hexahedron(hexPoints[0],
hexPoints[1],
hexPoints[2],
hexPoints[3],
hexPoints[4],
hexPoints[5],
hexPoints[6],
hexPoints[7]);
m_hexes_v[icell] = hex;
m_hexBoundingBoxes_v[icell] = axom::primal::compute_bounding_box(hex);
});

// compute and store hex bounding boxes and mesh bounding box
hexMesh.m_hexBoundingBoxes.reserve(numCells);
for(const auto& hex : hexMesh.hexes())
// Initialize mesh's bounding box on the host
BBoxArray hexBoundingBoxes_h = on_device
? BBoxArray(hexMesh.m_hexBoundingBoxes, host_allocator)
: hexMesh.m_hexBoundingBoxes;
for(const auto& hexbb : hexBoundingBoxes_h)
{
hexMesh.m_hexBoundingBoxes.emplace_back(
axom::primal::compute_bounding_box(hex));
hexMesh.m_meshBoundingBox.addBox(hexMesh.m_hexBoundingBoxes.back());
hexMesh.m_meshBoundingBox.addBox(hexbb);
}

SLIC_INFO(
Expand All @@ -336,21 +394,21 @@ HexMesh loadBlueprintHexMesh(const std::string& mesh_path,
// Append mesh nodes
for(int i = 0; i < num_nodes; i++)
{
mesh->appendNode(x_vals[i], y_vals[i], z_vals[i]);
mesh->appendNode(x_vals_h[i], y_vals_h[i], z_vals_h[i]);
}

// Append mesh cells
for(int i = 0; i < numCells; i++)
{
const axom::IndexType cell[] = {
connectivity[i * HEX_OFFSET],
connectivity[(i * HEX_OFFSET) + 1],
connectivity[(i * HEX_OFFSET) + 2],
connectivity[(i * HEX_OFFSET) + 3],
connectivity[(i * HEX_OFFSET) + 4],
connectivity[(i * HEX_OFFSET) + 5],
connectivity[(i * HEX_OFFSET) + 6],
connectivity[(i * HEX_OFFSET) + 7],
connectivity_h[i * HEX_OFFSET],
connectivity_h[(i * HEX_OFFSET) + 1],
connectivity_h[(i * HEX_OFFSET) + 2],
connectivity_h[(i * HEX_OFFSET) + 3],
connectivity_h[(i * HEX_OFFSET) + 4],
connectivity_h[(i * HEX_OFFSET) + 5],
connectivity_h[(i * HEX_OFFSET) + 6],
connectivity_h[(i * HEX_OFFSET) + 7],
};

mesh->appendCell(cell);
Expand Down Expand Up @@ -382,8 +440,6 @@ axom::Array<IndexPair> findCandidatesBVH(const HexMesh& insertMesh,
SLIC_INFO("Running BVH candidates algorithm in execution Space: "
<< axom::execution_space<ExecSpace>::name());

using HexArray = axom::Array<typename HexMesh::Hexahedron>;
using BBoxArray = axom::Array<typename HexMesh::BoundingBox>;
using IndexArray = axom::Array<axom::IndexType>;
constexpr bool on_device = axom::execution_space<ExecSpace>::onDevice();

Expand All @@ -396,31 +452,9 @@ axom::Array<IndexPair> findCandidatesBVH(const HexMesh& insertMesh,
? axom::getUmpireResourceAllocatorID(umpire::resource::Device)
: axom::execution_space<ExecSpace>::allocatorID();

// Copy the insert-BVH hexes to the device, if necessary
// Either way, insert_hexes_v will be a view w/ data in the correct space
auto& insert_hexes_h = insertMesh.hexes();
HexArray insert_hexes_d =
on_device ? HexArray(insert_hexes_h, kernel_allocator) : HexArray();

// Copy the insert-BVH bboxes to the device, if necessary
// Either way, insert_bbox_v will be a view w/ data in the correct space
auto& insert_bbox_h = insertMesh.hexBoundingBoxes();
BBoxArray insert_bbox_d =
on_device ? BBoxArray(insert_bbox_h, kernel_allocator) : BBoxArray();
auto insert_bbox_v = on_device ? insert_bbox_d.view() : insert_bbox_h.view();

// Copy the query-BVH hexes to the device, if necessary
// Either way, query_hexes_v will be a view w/ data in the correct space
auto& query_hexes_h = queryMesh.hexes();
HexArray query_hexes_d =
on_device ? HexArray(query_hexes_h, kernel_allocator) : HexArray();

// Copy the query-BVH bboxes to the device, if necessary
// Either way, bbox_v will be a view w/ data in the correct space
auto& query_bbox_h = queryMesh.hexBoundingBoxes();
BBoxArray query_bbox_d =
on_device ? BBoxArray(query_bbox_h, kernel_allocator) : BBoxArray();
auto query_bbox_v = on_device ? query_bbox_d.view() : query_bbox_h.view();
// Get the insert and query bounding boxes
auto insert_bbox_v = (insertMesh.hexBoundingBoxes()).view();
auto query_bbox_v = (queryMesh.hexBoundingBoxes()).view();

// Initialize a BVH tree over the insert mesh bounding boxes
axom::spin::BVH<3, ExecSpace, double> bvh;
Expand Down Expand Up @@ -527,8 +561,6 @@ axom::Array<IndexPair> findCandidatesImplicit(const HexMesh& insertMesh,
SLIC_INFO("Running Implicit Grid candidates algorithm in execution Space: "
<< axom::execution_space<ExecSpace>::name());

using HexArray = axom::Array<typename HexMesh::Hexahedron>;
using BBoxArray = axom::Array<typename HexMesh::BoundingBox>;
using IndexArray = axom::Array<int>;
constexpr bool on_device = axom::execution_space<ExecSpace>::onDevice();

Expand All @@ -539,35 +571,13 @@ axom::Array<IndexPair> findCandidatesImplicit(const HexMesh& insertMesh,
? axom::getUmpireResourceAllocatorID(umpire::resource::Device)
: axom::execution_space<ExecSpace>::allocatorID();

// Copy the insert hexes to the device, if necessary
// Either way, insert_hexes_v will be a view w/ data in the correct space
auto& insert_hexes_h = insertMesh.hexes();
HexArray insert_hexes_d =
on_device ? HexArray(insert_hexes_h, kernel_allocator) : HexArray();

// Copy the insert bboxes to the device, if necessary
// Either way, insert_bbox_v will be a view w/ data in the correct space
auto& insert_bbox_h = insertMesh.hexBoundingBoxes();
BBoxArray insert_bbox_d =
on_device ? BBoxArray(insert_bbox_h, kernel_allocator) : BBoxArray();
auto insert_bbox_v = on_device ? insert_bbox_d.view() : insert_bbox_h.view();
// Get the insert and query bounding boxes
auto insert_bbox_v = (insertMesh.hexBoundingBoxes()).view();
auto query_bbox_v = (queryMesh.hexBoundingBoxes()).view();

// Bounding box of entire insert mesh
HexMesh::BoundingBox insert_mesh_bbox_h = insertMesh.meshBoundingBox();

// Copy the query hexes to the device, if necessary
// Either way, query_hexes_v will be a view w/ data in the correct space
auto& query_hexes_h = queryMesh.hexes();
HexArray query_hexes_d =
on_device ? HexArray(query_hexes_h, kernel_allocator) : HexArray();

// Copy the query bboxes to the device, if necessary
// Either way, bbox_v will be a view w/ data in the correct space
auto& query_bbox_h = queryMesh.hexBoundingBoxes();
BBoxArray query_bbox_d =
on_device ? BBoxArray(query_bbox_h, kernel_allocator) : BBoxArray();
auto query_bbox_v = on_device ? query_bbox_d.view() : query_bbox_h.view();

// If given resolution is less than one, use the cube root of the
// number of hexes
if(resolution < 1)
Expand Down Expand Up @@ -749,15 +759,65 @@ int main(int argc, char** argv)
SLIC_INFO(axom::fmt::format("Reading Blueprint file to insert: '{}'...\n",
params.mesh_file_first));

HexMesh insert_mesh =
loadBlueprintHexMesh(params.mesh_file_first, params.isVerbose());
HexMesh insert_mesh;

switch(params.policy)
{
#ifdef AXOM_RUNTIME_POLICY_USE_OPENMP
case RuntimePolicy::omp:
insert_mesh =
loadBlueprintHexMesh<omp_exec>(params.mesh_file_first, params.isVerbose());
break;
#endif
#ifdef AXOM_RUNTIME_POLICY_USE_CUDA
case RuntimePolicy::cuda:
insert_mesh = loadBlueprintHexMesh<cuda_exec>(params.mesh_file_first,
params.isVerbose());
break;
#endif
#ifdef AXOM_RUNTIME_POLICY_USE_HIP
case RuntimePolicy::hip:
insert_mesh =
loadBlueprintHexMesh<hip_exec>(params.mesh_file_first, params.isVerbose());
break;
#endif
default: // RuntimePolicy::seq
insert_mesh =
loadBlueprintHexMesh<seq_exec>(params.mesh_file_first, params.isVerbose());
break;
}

// Load Blueprint mesh for querying spatial index
SLIC_INFO(axom::fmt::format("Reading Blueprint file to query: '{}'...\n",
params.mesh_file_second));

HexMesh query_mesh =
loadBlueprintHexMesh(params.mesh_file_second, params.isVerbose());
HexMesh query_mesh;

switch(params.policy)
{
#ifdef AXOM_RUNTIME_POLICY_USE_OPENMP
case RuntimePolicy::omp:
query_mesh = loadBlueprintHexMesh<omp_exec>(params.mesh_file_second,
params.isVerbose());
break;
#endif
#ifdef AXOM_RUNTIME_POLICY_USE_CUDA
case RuntimePolicy::cuda:
query_mesh = loadBlueprintHexMesh<cuda_exec>(params.mesh_file_second,
params.isVerbose());
break;
#endif
#ifdef AXOM_RUNTIME_POLICY_USE_HIP
case RuntimePolicy::hip:
query_mesh = loadBlueprintHexMesh<hip_exec>(params.mesh_file_second,
params.isVerbose());
break;
#endif
default: // RuntimePolicy::seq
query_mesh = loadBlueprintHexMesh<seq_exec>(params.mesh_file_second,
params.isVerbose());
break;
}

timer.stop();

Expand Down

0 comments on commit 5743116

Please sign in to comment.