Skip to content

Commit

Permalink
Improve guards for arch-specific instructions (#368)
Browse files Browse the repository at this point in the history
When choosing between a specialized implementation that uses
architecture-specific functionality and a generic fallback, it is
usually preferable to make the fallback the default. This will give the
software the best possible chance of functioning without modification
on future hardware.

Of course, the library will still need code updates to function
optimally on hardware released after the software was written.

rocSPARSE can also be compiled with CXXFLAGS=-DROCSPARSE_USE_MOVE_DPP=0
to force the use of the fallback implementation. Or with the value 1 to
force the use of the specialized __hip_move_dpp implementation.

This change fixes the compilation error:

    Illegal instruction detected: Invalid dpp_ctrl value: broadcasts are not supported on GFX10+

when building for unsupported Navi 1x and Navi 2x GPUs as was
reported in #250
  • Loading branch information
cgmb committed Jul 9, 2022
1 parent 8e5ffc1 commit f9446b8
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions library/src/include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@

// clang-format off

#ifndef ROCSPARSE_USE_MOVE_DPP
#if defined(__gfx803__) || \
defined(__gfx900__) || \
defined(__gfx906__) || \
defined(__gfx908__) || \
defined(__gfx90a__)
#define ROCSPARSE_USE_MOVE_DPP 1
#else
#define ROCSPARSE_USE_MOVE_DPP 0
#endif
#endif

// BSR indexing macros
#define BSR_IND(j, bi, bj, dir) ((dir == rocsparse_direction_row) ? BSR_IND_R(j, bi, bj) : BSR_IND_C(j, bi, bj))
#define BSR_IND_R(j, bi, bj) (block_dim * block_dim * (j) + (bi) * block_dim + (bj))
Expand Down Expand Up @@ -233,7 +245,7 @@ __device__ __forceinline__ void rocsparse_blockreduce_min(int i, T* data)
if(BLOCKSIZE > 1) { if(i < 1 && i + 1 < BLOCKSIZE) { data[i] = min(data[i], data[i + 1]); } __syncthreads(); }
}

#if (!defined(__gfx1030__)) && (!defined(__gfx1011__))
#if ROCSPARSE_USE_MOVE_DPP
// DPP-based wavefront reduction maximum
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
Expand Down Expand Up @@ -499,7 +511,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)
sum = temp_sum.val;
return sum;
}
#else
#else /* ROCSPARSE_USE_MOVE_DPP */
template <unsigned int WFSIZE>
__device__ __forceinline__ void rocsparse_wfreduce_max(int* maximum)
{
Expand Down Expand Up @@ -566,7 +578,7 @@ __device__ __forceinline__ double rocsparse_wfreduce_sum(double sum)

return sum;
}
#endif
#endif /* ROCSPARSE_USE_MOVE_DPP */

// DPP-based complex float wavefront reduction sum
template <unsigned int WFSIZE>
Expand Down

0 comments on commit f9446b8

Please sign in to comment.