From f41286dc2b3b4ae691cd7a7214ad3d5d948e7d68 Mon Sep 17 00:00:00 2001 From: HaoLi980405 Date: Tue, 30 Sep 2025 15:01:28 +0800 Subject: [PATCH 1/3] [bugfix] fix gsa coredump --- ucm/sparse/gsa/gsa.py | 4 +--- .../gsa/offload_ops/include/cal_kpre_and_topk.h | 2 +- ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp | 11 +++-------- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/ucm/sparse/gsa/gsa.py b/ucm/sparse/gsa/gsa.py index cadb340b..cefd5b6b 100644 --- a/ucm/sparse/gsa/gsa.py +++ b/ucm/sparse/gsa/gsa.py @@ -497,9 +497,7 @@ def init_topk_cal( self.gsa_offload_ops = gsa_offload_ops.CalKpreAndTopk( self.layer_num, block_size, MAX_BS, att_num_heads, head_size ) - self.gsa_offload_ops.set_kpre_method_param( - int(max_model_len / block_size) * MAX_BS, kv_num_heads, 1 - ) + self.gsa_offload_ops.set_kpre_method_param(kv_num_heads, 1) self.gsa_offload_ops.set_kpre_cache(prefetch_engine.kpre_caches) self.is_cal_kpre = [False] * self.layer_num self.gsa_q_cache = torch.zeros( diff --git a/ucm/sparse/gsa/offload_ops/include/cal_kpre_and_topk.h b/ucm/sparse/gsa/offload_ops/include/cal_kpre_and_topk.h index 9cbf7cae..3e52ca80 100644 --- a/ucm/sparse/gsa/offload_ops/include/cal_kpre_and_topk.h +++ b/ucm/sparse/gsa/offload_ops/include/cal_kpre_and_topk.h @@ -46,7 +46,7 @@ class __attribute__((visibility("hidden"))) CalKpreAndTopk public: CalKpreAndTopk(uint32_t layerNum, uint32_t blockSize, uint32_t maxBs, uint32_t numHeads, uint32_t headSize); ~CalKpreAndTopk(); - void SetKpreMethodParam(uint32_t maxBlockNum, uint32_t numHeads, uint32_t numKpre); + void SetKpreMethodParam(uint32_t numHeads, uint32_t numKpre); void SetKpreCache(std::vector& kpreCache); void SetTopkCache(std::vector& topkCache, std::vector& topkLens); void SetCommonParam(std::vector& calTopkIdx, std::vector& isDecode); diff --git a/ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp b/ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp index 89a6b1d3..522a2881 100644 --- a/ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp +++ b/ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp @@ -27,15 +27,10 @@ CalKpreAndTopk::CalKpreAndTopk(uint32_t layerNum, uint32_t blockSize, uint32_t m m_count = 0; } -void CalKpreAndTopk::SetKpreMethodParam(uint32_t maxBlockNum, uint32_t numHeads, uint32_t numKpre) +void CalKpreAndTopk::SetKpreMethodParam(uint32_t numHeads, uint32_t numKpre) { - // m_kNumHeads = numHeads; - // m_numKpre = numKpre; - // auto optionsForKCache = torch::TensorOptions().device("cpu").dtype(torch::kFloat32); - // for (uint32_t i = 0; i < m_layerNum; i++) { - // torch::Tensor layerKCache = torch::zeros({maxBlockNum, m_kNumHeads, m_blockSize, m_headSize}, optionsForKCache); - // m_kCache.push_back(layerKCache); - // } + m_kNumHeads = numHeads; + m_numKpre = numKpre; } void CalKpreAndTopk::SetKpreCache(std::vector& kpreCache) From 5d7df37322943f6de77e495a26321c66f1f298a8 Mon Sep 17 00:00:00 2001 From: HaoLi980405 Date: Tue, 30 Sep 2025 15:04:02 +0800 Subject: [PATCH 2/3] [bugfix] fix build_sparse_meta param lost --- ucm/sparse/gsa/gsa.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ucm/sparse/gsa/gsa.py b/ucm/sparse/gsa/gsa.py index cefd5b6b..8e4e61ae 100644 --- a/ucm/sparse/gsa/gsa.py +++ b/ucm/sparse/gsa/gsa.py @@ -870,6 +870,7 @@ def build_sparse_meta( scheduler_output: SchedulerOutput, requests, input_batch, + attn_metadata ) -> None: self.gsa_metadata = self.build_gsa_metadata( scheduler_output, requests, input_batch From 18133b87e9ff057ef106754c731c56ad3aa1054f Mon Sep 17 00:00:00 2001 From: HaoLi980405 Date: Tue, 30 Sep 2025 15:53:59 +0800 Subject: [PATCH 3/3] clean code --- ucm/sparse/gsa/gsa.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ucm/sparse/gsa/gsa.py b/ucm/sparse/gsa/gsa.py index 8e4e61ae..3d4f4978 100644 --- a/ucm/sparse/gsa/gsa.py +++ b/ucm/sparse/gsa/gsa.py @@ -866,11 +866,7 @@ def execute_finished(self): ) def build_sparse_meta( - self, - scheduler_output: SchedulerOutput, - requests, - input_batch, - attn_metadata + self, scheduler_output: SchedulerOutput, requests, input_batch, attn_metadata ) -> None: self.gsa_metadata = self.build_gsa_metadata( scheduler_output, requests, input_batch