From 0abbcd19c1589bac3e5d1eae5d87a40535b26510 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Mon, 13 Sep 2021 01:44:53 -0700 Subject: [PATCH] sbgemm: spr: tuning for blocking params --- param.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/param.h b/param.h index 4e83714d18..c2c6916bc7 100644 --- a/param.h +++ b/param.h @@ -1771,6 +1771,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #define USE_SGEMM_KERNEL_DIRECT 1 +#undef SBGEMM_DEFAULT_UNROLL_N +#undef SBGEMM_DEFAULT_UNROLL_M +#undef SBGEMM_DEFAULT_P +#undef SBGEMM_DEFAULT_R +#undef SBGEMM_DEFAULT_Q +// FIXME: actually UNROLL_M = UNROLL_N = 16 +// If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY. +// But for AMX, they are not the same, set UNROLL_M = 32 to workaround +#define SBGEMM_DEFAULT_UNROLL_N 16 +#define SBGEMM_DEFAULT_UNROLL_M 32 +#define SBGEMM_DEFAULT_P 192 +#define SBGEMM_DEFAULT_Q 1024 +#define SBGEMM_DEFAULT_R sbgemm_r + #ifdef ARCH_X86 #define SGEMM_DEFAULT_UNROLL_M 4