ModelEngine-Group · qyh111 · Sep 18, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 11, 2025
@@ -64,7 +64,7 @@ jobs:
 
 
       - name: Install unified-cache-management
-        run: pip install -v -e .
+        run: pip install -v -e . --no-build-isolation
 
       - name: Run ut
         run: python3 -m unittest discover -s test

@@ -11,7 +11,7 @@ COPY . /vllm-workspace/unified-cache-management
 RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 RUN export PLATFORM="cuda" && \
-     pip install -v -e /vllm-workspace/unified-cache-management
+     pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation
 
 # Apply patch for vLLM
 RUN cd $(pip show vllm | grep Location | awk '{print $2}') \

@@ -12,7 +12,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 RUN export PLATFORM="ascend" && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
-    pip install -v -e /vllm-workspace/unified-cache-management
+    pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation
 
 # Apply patch for vLLM
 RUN cd /vllm-workspace/vllm \

@@ -45,7 +45,7 @@ Follow commands below to install unified-cache-management:
 git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
 cd unified-cache-management
 export PLATFORM=cuda
-pip install -v -e .
+pip install -v -e . --no-build-isolation
 cd ..
 ```
 

@@ -55,7 +55,7 @@ Follow commands below to install unified-cache-management:
 git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
 cd unified-cache-management
 export PLATFORM=ascend
-pip install -v -e .
+pip install -v -e . --no-build-isolation
 cd ..
 ```
 

@@ -30,7 +30,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
                 "max_cache_size": 5368709120,
                 "kv_block_size": 262144,
             },
-            "ucm_sparse_method": "ESA",
+            "ucm_sparse_method": "GSA",
         },
     )
 
@@ -39,6 +39,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
         kv_transfer_config=ktc,
         max_model_len=8000,
         gpu_memory_utilization=0.8,
+        block_size=128,
     )
 
     llm = LLM(**asdict(llm_args))

@@ -25,15 +25,22 @@
 import os
 import shutil
 import subprocess
+import sys
 
 from setuptools import Extension, find_packages, setup
 from setuptools.command.build_ext import build_ext
 from setuptools.command.develop import develop
 
 ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
-SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
-INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
+FSSTORE_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
+GSA_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "gsaoffloadops")
+PREFETCH_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmprefetch")
+
+FSSTORE_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
+GSA_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "ucm_sparse")
+
 PLATFORM = os.getenv("PLATFORM")
+RUNTIME_ENVIRONMENT = os.getenv("RUNTIME_ENVIRONMENT")
 
 
 def _is_cuda() -> bool:
@@ -56,62 +63,95 @@ def run(self):
             self.build_cmake(ext)
 
     def build_cmake(self, ext: CMakeExtension):
-        build_dir = os.path.abspath(self.build_temp)
+        build_dir = os.path.abspath(os.path.join(self.build_temp, ext.name))
         os.makedirs(build_dir, exist_ok=True)
+
+        cmake_args = [
+            "cmake",
+            f"-DCMAKE_BUILD_TYPE=Release",
+            f"-DPYTHON_EXECUTABLE={sys.executable}",
+        ]
+
+        cmake_args.append("-DDOWNLOAD_DEPENDENCE=ON")
         if _is_cuda():
-            subprocess.check_call(
-                [
-                    "cmake",
-                    "-DDOWNLOAD_DEPENDENCE=ON",
-                    "-DRUNTIME_ENVIRONMENT=cuda",
-                    ext.sourcedir,
-                ],
-                cwd=build_dir,
-            )
+            cmake_args.append("-DRUNTIME_ENVIRONMENT=cuda")
         elif _is_npu():
-            subprocess.check_call(
-                [
-                    "cmake",
-                    "-DDOWNLOAD_DEPENDENCE=ON",
-                    "-DRUNTIME_ENVIRONMENT=ascend",
-                    ext.sourcedir,
-                ],
-                cwd=build_dir,
-            )
+            cmake_args.append("-DRUNTIME_ENVIRONMENT=ascend")
         else:
             raise RuntimeError(
                 "No supported accelerator found. "
                 "Please ensure either CUDA or NPU is available."
             )
 
-        subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
+        cmake_args.append(ext.sourcedir)
 
-        so_file = None
+        print(f"[INFO] Building {ext.name} module with CMake")
+        print(f"[INFO] Source directory: {ext.sourcedir}")
+        print(f"[INFO] Build directory: {build_dir}")
+
+        subprocess.check_call(cmake_args, cwd=build_dir)
+
+        if ext.name in ["nfsstore", "gsa_offload_ops"]:
+            subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
+        else:
+            # 对于gsa_prefetch使用cmake --build
+            subprocess.check_call(
+                ["cmake", "--build", ".", "--config", "Release", "--", "-j8"],
+                cwd=build_dir,
+            )
+
+        self._copy_so_files(ext)
+
+    def _copy_so_files(self, ext: CMakeExtension):
+        """复制编译好的.so文件"""
         so_search_dir = os.path.join(ext.sourcedir, "output", "lib")
         if not os.path.exists(so_search_dir):
             raise FileNotFoundError(f"{so_search_dir} does not exist!")
 
-        so_file = None
+        so_files = []
+        search_patterns = [ext.name]
+
+        if ext.name == "nfsstore":
+            search_patterns.extend(["ucmnfsstore"])
+        elif ext.name == "gsa_offload_ops":
+            search_patterns.extend(["gsa_offload_ops"])
+        elif ext.name == "gsa_prefetch":
+            search_patterns.extend(["prefetch"])
+
         for file in os.listdir(so_search_dir):
-            if file.startswith("ucmnfsstore") and file.endswith(".so"):
-                so_file = file
-                break
+            if file.endswith(".so") or ".so." in file:
+                for pattern in search_patterns:
+                    if pattern in file:
+                        so_files.append(file)
+                        break
+
+        if ext.name == "nfsstore":
+            install_dir = FSSTORE_INSTALL_DIR
+            build_install_dir = "ucm/store"
+        else:
+            install_dir = GSA_INSTALL_DIR
+            build_install_dir = "ucm_sparse"
+
+        for so_file in so_files:
+            src_path = os.path.join(so_search_dir, so_file)
+            dev_path = os.path.join(install_dir, so_file)
+            dst_path = os.path.join(self.build_lib, build_install_dir, so_file)
+
+            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+            shutil.copy(src_path, dst_path)
+            print(f"[INFO] Copied {so_file} → {dst_path}")
+
+            if isinstance(self.distribution.get_command_obj("develop"), develop):
+                os.makedirs(os.path.dirname(dev_path), exist_ok=True)
+                shutil.copy(src_path, dev_path)
+                print(f"[INFO] Copied in editable mode {so_file} → {dev_path}")
 
-        if not so_file:
-            raise FileNotFoundError(
-                "Compiled .so file not found in output/lib directory."
-            )
 
-        src_path = os.path.join(so_search_dir, so_file)
-        dev_path = os.path.join(INSTALL_DIR, so_file)
-        dst_path = os.path.join(self.build_lib, "ucm", "store", so_file)
-        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
-        shutil.copy(src_path, dst_path)
-        print(f"[INFO] Copied {src_path} → {dst_path}")
-        if isinstance(self.distribution.get_command_obj("develop"), develop):
-            shutil.copy(src_path, dev_path)
-            print(f"[INFO] Copied in editable mode {src_path} → {dev_path}")
+ext_modules = []
 
+ext_modules.append(CMakeExtension(name="nfsstore", sourcedir=FSSTORE_SRC_DIR))
+ext_modules.append(CMakeExtension(name="gsa_offload_ops", sourcedir=GSA_SRC_DIR))
+ext_modules.append(CMakeExtension(name="gsa_prefetch", sourcedir=PREFETCH_SRC_DIR))
 
 setup(
     name="ucm",
@@ -120,7 +160,7 @@ def build_cmake(self, ext: CMakeExtension):
     author="Unified Cache Team",
     packages=find_packages(),
     python_requires=">=3.10",
-    ext_modules=[CMakeExtension(name="ucmnfsstore", sourcedir=SRC_DIR)],
+    ext_modules=ext_modules,
     cmdclass={"build_ext": CMakeBuild},
     zip_safe=False,
 )
@@ -0,0 +1,141 @@
+cmake_minimum_required(VERSION 3.16)
+project(gsa_offload_ops)
+
+# 设置C++标准
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# 从环境变量获取设备类型，默认为cuda
+set(RUNTIME_ENVIRONMENT $ENV{RUNTIME_ENVIRONMENT})
+if(NOT RUNTIME_ENVIRONMENT)
+    set(RUNTIME_ENVIRONMENT "cuda")
+endif()
+
+message(STATUS "[INFO] Building gsa_offload_ops for device: ${RUNTIME_ENVIRONMENT}")
+
+# 查找必要的包
+find_package(Python COMPONENTS Interpreter Development REQUIRED)
+
+# 查找PyTorch路径
+execute_process(
+    COMMAND ${Python_EXECUTABLE} -c "import torch; import os; print(os.path.dirname(os.path.abspath(torch.__file__)))"
+    OUTPUT_VARIABLE PYTORCH_PATH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    RESULT_VARIABLE PYTORCH_RESULT
+)
+
+if(NOT PYTORCH_RESULT EQUAL 0)
+    message(FATAL_ERROR "Failed to find PyTorch installation")
+endif()
+
+# 查找pybind11
+execute_process(
+    COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir())"
+    OUTPUT_VARIABLE pybind11_DIR
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    RESULT_VARIABLE PYBIND11_RESULT
+)
+
+if(NOT PYBIND11_RESULT EQUAL 0)
+    message(FATAL_ERROR "Failed to find pybind11")
+endif()
+
+find_package(pybind11 REQUIRED)
+
+# 设置基础编译选项
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fopenmp -march=native")
+set(CXX11_ABI "1")
+
+# 根据设备类型配置
+set(INCLUDE_DIRS
+    ${PYTORCH_PATH}/include/torch/csrc/api/include
+    ${PYTORCH_PATH}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+set(LIBRARY_DIRS
+    ${PYTORCH_PATH}/lib
+    /usr/local/lib
+)
+
+set(LIBRARIES
+    torch
+    c10
+    torch_cpu
+    torch_python
+    gomp
+    pthread
+)
+
+# NPU特殊配置
+if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
+    message(STATUS "Configuring for NPU/Ascend device")
+
+    # 查找torch_npu路径
+    execute_process(
+        COMMAND ${Python_EXECUTABLE} -c "import torch_npu; import os; print(os.path.dirname(os.path.abspath(torch_npu.__file__)))"
+        OUTPUT_VARIABLE PYTORCH_NPU_PATH
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE NPU_RESULT
+    )
+
+    if(NPU_RESULT EQUAL 0)
+        message(STATUS "Found torch_npu at: ${PYTORCH_NPU_PATH}")
+        list(INSERT INCLUDE_DIRS 0 ${PYTORCH_NPU_PATH}/include)
+        list(INSERT LIBRARY_DIRS 0 ${PYTORCH_NPU_PATH}/lib)
+        list(INSERT LIBRARIES 0 torch_npu)
+        set(CXX11_ABI "0")
+    else()
+        message(WARNING "torch_npu not found, but RUNTIME_ENVIRONMENT is set to ascend")
+    endif()
+endif()
+
+# 设置CXX11_ABI宏
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI}")
+
+# 查找OpenMP
+find_package(OpenMP REQUIRED)
+
+# 定义源文件
+set(SOURCES
+    src/thread_safe_queue.cpp
+    src/vec_product.cpp
+    src/k_repre.cpp
+    src/select_topk_block.cpp
+    src/cal_kpre_and_topk.cpp
+    src/pybinds.cpp
+)
+
+# 创建pybind11模块
+pybind11_add_module(gsa_offload_ops ${SOURCES})
+
+# 设置头文件目录
+target_include_directories(gsa_offload_ops PRIVATE ${INCLUDE_DIRS})
+
+# 设置库文件目录
+target_link_directories(gsa_offload_ops PRIVATE ${LIBRARY_DIRS})
+
+# 链接库
+target_link_libraries(gsa_offload_ops PRIVATE ${LIBRARIES})
+
+# 链接OpenMP
+if(OpenMP_CXX_FOUND)
+    target_link_libraries(gsa_offload_ops PRIVATE OpenMP::OpenMP_CXX)
+endif()
+
+# 确保输出目录存在
+set(OUTPUT_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/output/lib)
+file(MAKE_DIRECTORY ${OUTPUT_LIB_DIR})
+
+# 设置输出路径
+set_target_properties(gsa_offload_ops PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
+    RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
+)
+
+# 编译后输出信息
+add_custom_command(TARGET gsa_offload_ops POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E echo "Built gsa_offload_ops successfully for ${RUNTIME_ENVIRONMENT}"
+    COMMAND ${CMAKE_COMMAND} -E echo "CXX11_ABI=${CXX11_ABI}"
+    COMMAND ${CMAKE_COMMAND} -E echo "Output location: ${OUTPUT_LIB_DIR}"
+)