Skip to content

Commit

Permalink
CPU version of TagBox::buffer (#1957)
Browse files Browse the repository at this point in the history
Add a CPU version of TagBox::buffer.  @MSABuschman reported in #1951 that
TagBox::buffer has been very slow since commit #1258 if the error buffer
size is large.  The function was rewritten in #1258 to do the work on GPU.
In this PR, the old version is reintroduced for CPU.

Note that the current implementation is expected to have poor performance on
GPU if it has a very large error buffer.  It's still not clear how we should
implement this function for GPU if a large error buffer is used.
  • Loading branch information
WeiqunZhang committed Apr 19, 2021
1 parent 4320729 commit c409f9e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ jobs:
- name: Build & Install
run: |
./configure --dim 2 --with-fortran no --comp llvm --with-mpi no
make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-error=c++17-extensions"
make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-c++17-extensions"
make install
# Build 3D libamrex with configure
Expand Down
5 changes: 4 additions & 1 deletion Src/AmrCore/AMReX_TagBox.H
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@ public:

/**
* \brief Mark neighbors of every tagged cell a distance nbuff away
* only search interior for initial tagged points where nwid
* is given as the width of the bndry region.
*
* \param nbuff
* \param nwid
*/
void buffer (const IntVect& nbuf) noexcept;
void buffer (const IntVect& nbuf, const IntVect& nwid) noexcept;

/**
* \brief Returns Vector\<int\> of size domain.numPts() suitable for calling
Expand Down
65 changes: 43 additions & 22 deletions Src/AmrCore/AMReX_TagBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,30 +75,51 @@ TagBox::coarsen (const IntVect& ratio, const Box& cbox) noexcept
}

void
TagBox::buffer (const IntVect& a_nbuff) noexcept
TagBox::buffer (const IntVect& a_nbuff, const IntVect& a_nwid) noexcept
{
amrex::ignore_unused(a_nbuff, a_nwid);
Array4<char> const& a = this->array();
Dim3 nbuf = a_nbuff.dim3();
const auto lo = amrex::lbound(domain);
const auto hi = amrex::ubound(domain);
AMREX_HOST_DEVICE_FOR_3D(domain, i, j, k,
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
Dim3 nbuf = a_nbuff.dim3();
const auto lo = amrex::lbound(domain);
const auto hi = amrex::ubound(domain);
AMREX_HOST_DEVICE_FOR_3D(domain, i, j, k,
{
if (a(i,j,k) == TagBox::CLEAR) {
bool to_buf = false;
int imin = amrex::max(i-nbuf.x, lo.x);
int jmin = amrex::max(j-nbuf.y, lo.y);
int kmin = amrex::max(k-nbuf.z, lo.z);
int imax = amrex::min(i+nbuf.x, hi.x);
int jmax = amrex::min(j+nbuf.y, hi.y);
int kmax = amrex::min(k+nbuf.z, hi.z);
// xxxxx TODO: If nbuf is large, this is not efficient.
// We need to find another better way.
for (int kk = kmin; kk <= kmax && !to_buf; ++kk) {
for (int jj = jmin; jj <= jmax && !to_buf; ++jj) {
for (int ii = imin; ii <= imax && !to_buf; ++ii) {
if (a(ii,jj,kk) == TagBox::SET) to_buf = true;
}}}
if (to_buf) a(i,j,k) = TagBox::BUF;
}
});
} else
#endif
{
if (a(i,j,k) == TagBox::CLEAR) {
bool to_buf = false;
int imin = amrex::max(i-nbuf.x, lo.x);
int jmin = amrex::max(j-nbuf.y, lo.y);
int kmin = amrex::max(k-nbuf.z, lo.z);
int imax = amrex::min(i+nbuf.x, hi.x);
int jmax = amrex::min(j+nbuf.y, hi.y);
int kmax = amrex::min(k+nbuf.z, hi.z);
for (int kk = kmin; kk <= kmax && !to_buf; ++kk) {
for (int jj = jmin; jj <= jmax && !to_buf; ++jj) {
for (int ii = imin; ii <= imax && !to_buf; ++ii) {
if (a(ii,jj,kk) == TagBox::SET) to_buf = true;
}}}
if (to_buf) a(i,j,k) = TagBox::BUF;
}
});
Dim3 nwid = a_nwid.dim3();
Box const& interior = amrex::grow(domain, -a_nwid);
AMREX_LOOP_3D(interior, i, j, k,
{
if (a(i,j,k) == TagBox::SET) {
for (int kk = k-nwid.z; kk <= k+nwid.z; ++kk) {
for (int jj = j-nwid.y; jj <= j+nwid.y; ++jj) {
for (int ii = i-nwid.x; ii <= i+nwid.x; ++ii) {
if (a(ii,jj,kk) == TagBox::CLEAR) { a(ii,jj,kk) = TagBox::BUF; }
}}}
}
});
}
}

// DEPRECATED
Expand Down Expand Up @@ -295,7 +316,7 @@ TagBoxArray::buffer (const IntVect& nbuf)
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(*this); mfi.isValid(); ++mfi) {
get(mfi).buffer(nbuf);
get(mfi).buffer(nbuf, n_grow);
}
}
}
Expand Down

0 comments on commit c409f9e

Please sign in to comment.