From 831b822b945b85eed66f4295e99bf0cc43dee437 Mon Sep 17 00:00:00 2001 From: Amrita H S Date: Thu, 4 Jun 2026 01:49:08 -0500 Subject: [PATCH] Fix incorrect inline assembly constraints in dcbt prefetch instructions Corrected the register constraints for the PowerPC dcbt (Data Cache Block Touch) instruction in Power10 kernel implementations. The dcbt instruction has special behavior where if the first operand (RA) is r0, it uses the value 0 instead of the register contents. Therefore, RA must use the "b" constraint (any GPR except r0), while RB can use "r" (any GPR including r0). Changes: - Changed first operand constraint from "r" to "b" to exclude r0 - Changed second operand constraint from "b" to "r" for flexibility This ensures correct prefetch behavior and compliance with PowerPC ISA specifications, preventing potential issues where r0 might be incorrectly used as the base address register. Signed-off-by: Amrita H S --- kernel/power/cgemm_kernel_power10.c | 2 +- kernel/power/dgemm_kernel_power10.c | 2 +- kernel/power/dgemm_ncopy_8_power10.c | 2 +- kernel/power/sbgemm_kernel_power10.c | 2 +- kernel/power/sgemm_kernel_power10.c | 2 +- kernel/power/zgemm_kernel_power10.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/power/cgemm_kernel_power10.c b/kernel/power/cgemm_kernel_power10.c index 279c83aec0..6ec40c2e71 100644 --- a/kernel/power/cgemm_kernel_power10.c +++ b/kernel/power/cgemm_kernel_power10.c @@ -452,7 +452,7 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8))); CO[6*ldc+0] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ CO[6*ldc+1] A_OP ti[3] * alpha_r + tr[3] * alpha_i; -#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #define REFRESH_TEMP_BK(x, y) \ diff --git a/kernel/power/dgemm_kernel_power10.c b/kernel/power/dgemm_kernel_power10.c index f5cc3dfede..c5a2d8cb9c 100644 --- a/kernel/power/dgemm_kernel_power10.c +++ b/kernel/power/dgemm_kernel_power10.c @@ -104,7 +104,7 @@ typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); __builtin_mma_xvf64gerpp(&acc5, rowB1, rowA[2]);\ __builtin_mma_xvf64gerpp(&acc6, rowB, rowA[3]);\ __builtin_mma_xvf64gerpp(&acc7, rowB1, rowA[3]); -#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #define REFRESH_TEMP_BK(x, y) \ diff --git a/kernel/power/dgemm_ncopy_8_power10.c b/kernel/power/dgemm_ncopy_8_power10.c index 9836c2e7f9..ec774c795b 100644 --- a/kernel/power/dgemm_ncopy_8_power10.c +++ b/kernel/power/dgemm_ncopy_8_power10.c @@ -39,7 +39,7 @@ #include #include "common.h" #include -#define PREFETCHA(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCHA(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){ BLASLONG i, j; diff --git a/kernel/power/sbgemm_kernel_power10.c b/kernel/power/sbgemm_kernel_power10.c index c3fa67cf68..33d744abdd 100644 --- a/kernel/power/sbgemm_kernel_power10.c +++ b/kernel/power/sbgemm_kernel_power10.c @@ -147,7 +147,7 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8))); __builtin_mma_xxsetaccz (&acc6); \ __builtin_mma_xxsetaccz (&acc7); -#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); /************************************************************************************* * SBGEMM Kernel *************************************************************************************/ diff --git a/kernel/power/sgemm_kernel_power10.c b/kernel/power/sgemm_kernel_power10.c index 1d86b57fcc..4bd6866cca 100644 --- a/kernel/power/sgemm_kernel_power10.c +++ b/kernel/power/sgemm_kernel_power10.c @@ -135,7 +135,7 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8))); __builtin_mma_xvf32gerpp (&acc6, rowB[i], rowA[j+3]); \ __builtin_mma_xvf32gerpp (&acc7, rowB[i+1], rowA[j+3]); -#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #define REFRESH_TEMP_BK(x, y) \ diff --git a/kernel/power/zgemm_kernel_power10.c b/kernel/power/zgemm_kernel_power10.c index e4e609067c..4f7476db86 100644 --- a/kernel/power/zgemm_kernel_power10.c +++ b/kernel/power/zgemm_kernel_power10.c @@ -221,7 +221,7 @@ typedef FLOAT v4sf_t __attribute__ ((vector_size (16))); CO[2*ldc+CI+2] A_OP tr[3] * alpha_r - ti[3] * alpha_i; \ CO[2*ldc+CI+3] A_OP ti[3] * alpha_r + tr[3] * alpha_i; -#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory"); +#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "b" (x), "r" (y) : "memory"); #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #define REFRESH_TEMP_BK(x, y) \