From c0b2772288ad85f5ff59ffec65f62b1867113340 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 16 Oct 2025 13:53:14 +0200 Subject: [PATCH 1/5] move L2 HFLOAT16 kernels out of the BFLOAT16 block --- cmake/kernel.cmake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 26f94bc0b5..3b9e3849d9 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -230,10 +230,12 @@ macro(SetDefaultL2) if (BUILD_BFLOAT16) SetFallback(BGEMVNKERNEL ../generic/gemv_n.c) SetFallback(BGEMVTKERNEL ../generic/gemv_t.c) - SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c) - SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c) SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c) SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c) +endif () +if (BUILD_HFLOAT16) + SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c) + SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c) SetFallback(SHGERKERNEL ../generic/ger.c) endif () endmacro () From c92bac152484c27cdbbafdeac350cc16a66463ff Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 16 Oct 2025 04:57:18 -0700 Subject: [PATCH 2/5] Add SHGEMV --- driver/level2/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index c52b461a7e..91bc3a56db 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -205,6 +205,13 @@ if (BUILD_BFLOAT16) endif () endif () +if (BUILD_HFLOAT16) + if (USE_THREAD) + GenerateNamedObjects("sbgemv_thread.c" "" "gemv_thread_n" false "" "" false "HFLOAT16") + GenerateNamedObjects("sbgemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "HFLOAT16") + endif () +endif () + if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) if (USE_THREAD) GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE") From a387217a0784e6f5c05afd21791dace51f00f1dc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 16 Oct 2025 05:02:24 -0700 Subject: [PATCH 3/5] Add BGEMV --- driver/level2/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 91bc3a56db..03d438ac10 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -202,6 +202,8 @@ if (BUILD_BFLOAT16) if (USE_THREAD) GenerateNamedObjects("sbgemv_thread.c" "" "gemv_thread_n" false "" "" false "BFLOAT16") GenerateNamedObjects("sbgemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "BFLOAT16") + GenerateNamedObjects("sbgemv_thread.c" "BGEMM" "bgemv_thread_n" false "" "" false "") + GenerateNamedObjects("sbgemv_thread.c" "BGEMM;TRANSA" "bgemv_thread_t" false "" "" false "") endif () endif () From a9a152ebc7bf9e775b8e1a0dfcf7e4ffeb19e7d5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 16 Oct 2025 10:00:41 -0700 Subject: [PATCH 4/5] fix bgemv build --- driver/level2/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 03d438ac10..a0360f7e1a 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -202,8 +202,8 @@ if (BUILD_BFLOAT16) if (USE_THREAD) GenerateNamedObjects("sbgemv_thread.c" "" "gemv_thread_n" false "" "" false "BFLOAT16") GenerateNamedObjects("sbgemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "BFLOAT16") - GenerateNamedObjects("sbgemv_thread.c" "BGEMM" "bgemv_thread_n" false "" "" false "") - GenerateNamedObjects("sbgemv_thread.c" "BGEMM;TRANSA" "bgemv_thread_t" false "" "" false "") + GenerateNamedObjects("sbgemv_thread.c" "BGEMM;BFLOAT16" "bgemv_thread_n" false "" "" true "") + GenerateNamedObjects("sbgemv_thread.c" "BGEMM;BFLOAT16;TRANSA" "bgemv_thread_t" false "" "" true "") endif () endif () From 5b640b1cbcd0394bad73ed5bb66ac74fad446189 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 16 Oct 2025 10:03:04 -0700 Subject: [PATCH 5/5] add bgemm_thread_xx --- driver/level3/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index be2ba23c24..468628af2e 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -17,6 +17,7 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};BGEMM" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "BFLOAT16") if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "BFLOAT16") + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3;BGEMM" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "BFLOAT16") endif () endif () if (BUILD_HFLOAT16)