From 50e5db789aff77e275dd792eca9e568509c47936 Mon Sep 17 00:00:00 2001 From: ThomasNing Date: Sat, 4 Oct 2025 20:53:51 +0000 Subject: [PATCH] add the sync barrier for persistent kernel --- include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp index 51ad4e3dd19..e77355ed3dc 100644 --- a/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp @@ -1134,6 +1134,7 @@ struct UniversalGemmKernel while(block_id < num_work) { + s_waitcnt_barrier(); // Get the tile index for this block const auto tile_idx = amd_wave_read_first_lane(block_id % num_tiles); const auto [iM, iN] = TilePartitioner{kargs.M, kargs.N}.GetOutputTileIndex(tile_idx);