Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unifiedcache_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:


- name: Install unified-cache-management
run: pip install -v -e .
run: pip install -v -e . --no-build-isolation

- name: Run ut
run: python3 -m unittest discover -s test
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ COPY . /vllm-workspace/unified-cache-management
RUN pip config set global.index-url ${PIP_INDEX_URL}

RUN export PLATFORM="cuda" && \
pip install -v -e /vllm-workspace/unified-cache-management
pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation

# Apply patch for vLLM
RUN cd $(pip show vllm | grep Location | awk '{print $2}') \
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile-NPU
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}

RUN export PLATFORM="ascend" && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
pip install -v -e /vllm-workspace/unified-cache-management
pip install -v -e /vllm-workspace/unified-cache-management --no-build-isolation

# Apply patch for vLLM
RUN cd /vllm-workspace/vllm \
Expand Down
2 changes: 1 addition & 1 deletion docs/source/getting-started/installation/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Follow commands below to install unified-cache-management:
git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
cd unified-cache-management
export PLATFORM=cuda
pip install -v -e .
pip install -v -e . --no-build-isolation
cd ..
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Follow commands below to install unified-cache-management:
git clone --depth 1 --branch <branch_or_tag_name> https://github.com/ModelEngine-Group/unified-cache-management.git
cd unified-cache-management
export PLATFORM=ascend
pip install -v -e .
pip install -v -e . --no-build-isolation
cd ..
```

Expand Down
3 changes: 2 additions & 1 deletion examples/offline_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
"max_cache_size": 5368709120,
"kv_block_size": 262144,
},
"ucm_sparse_method": "ESA",
"ucm_sparse_method": "GSA",
},
)

Expand All @@ -39,6 +39,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
kv_transfer_config=ktc,
max_model_len=8000,
gpu_memory_utilization=0.8,
block_size=128,
)

llm = LLM(**asdict(llm_args))
Expand Down
122 changes: 81 additions & 41 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,22 @@
import os
import shutil
import subprocess
import sys

from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
from setuptools.command.develop import develop

ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
FSSTORE_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmnfsstore")
GSA_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "gsaoffloadops")
PREFETCH_SRC_DIR = os.path.join(ROOT_DIR, "ucm", "csrc", "ucmprefetch")

FSSTORE_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "store")
GSA_INSTALL_DIR = os.path.join(ROOT_DIR, "ucm", "ucm_sparse")

PLATFORM = os.getenv("PLATFORM")
RUNTIME_ENVIRONMENT = os.getenv("RUNTIME_ENVIRONMENT")


def _is_cuda() -> bool:
Expand All @@ -56,62 +63,95 @@ def run(self):
self.build_cmake(ext)

def build_cmake(self, ext: CMakeExtension):
build_dir = os.path.abspath(self.build_temp)
build_dir = os.path.abspath(os.path.join(self.build_temp, ext.name))
os.makedirs(build_dir, exist_ok=True)

cmake_args = [
"cmake",
f"-DCMAKE_BUILD_TYPE=Release",
f"-DPYTHON_EXECUTABLE={sys.executable}",
]

cmake_args.append("-DDOWNLOAD_DEPENDENCE=ON")
if _is_cuda():
subprocess.check_call(
[
"cmake",
"-DDOWNLOAD_DEPENDENCE=ON",
"-DRUNTIME_ENVIRONMENT=cuda",
ext.sourcedir,
],
cwd=build_dir,
)
cmake_args.append("-DRUNTIME_ENVIRONMENT=cuda")
elif _is_npu():
subprocess.check_call(
[
"cmake",
"-DDOWNLOAD_DEPENDENCE=ON",
"-DRUNTIME_ENVIRONMENT=ascend",
ext.sourcedir,
],
cwd=build_dir,
)
cmake_args.append("-DRUNTIME_ENVIRONMENT=ascend")
else:
raise RuntimeError(
"No supported accelerator found. "
"Please ensure either CUDA or NPU is available."
)

subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
cmake_args.append(ext.sourcedir)

so_file = None
print(f"[INFO] Building {ext.name} module with CMake")
print(f"[INFO] Source directory: {ext.sourcedir}")
print(f"[INFO] Build directory: {build_dir}")

subprocess.check_call(cmake_args, cwd=build_dir)

if ext.name in ["nfsstore", "gsa_offload_ops"]:
subprocess.check_call(["make", "-j", "8"], cwd=build_dir)
else:
# 对于gsa_prefetch使用cmake --build
subprocess.check_call(
["cmake", "--build", ".", "--config", "Release", "--", "-j8"],
cwd=build_dir,
)

self._copy_so_files(ext)

def _copy_so_files(self, ext: CMakeExtension):
"""复制编译好的.so文件"""
so_search_dir = os.path.join(ext.sourcedir, "output", "lib")
if not os.path.exists(so_search_dir):
raise FileNotFoundError(f"{so_search_dir} does not exist!")

so_file = None
so_files = []
search_patterns = [ext.name]

if ext.name == "nfsstore":
search_patterns.extend(["ucmnfsstore"])
elif ext.name == "gsa_offload_ops":
search_patterns.extend(["gsa_offload_ops"])
elif ext.name == "gsa_prefetch":
search_patterns.extend(["prefetch"])

for file in os.listdir(so_search_dir):
if file.startswith("ucmnfsstore") and file.endswith(".so"):
so_file = file
break
if file.endswith(".so") or ".so." in file:
for pattern in search_patterns:
if pattern in file:
so_files.append(file)
break

if ext.name == "nfsstore":
install_dir = FSSTORE_INSTALL_DIR
build_install_dir = "ucm/store"
else:
install_dir = GSA_INSTALL_DIR
build_install_dir = "ucm_sparse"

for so_file in so_files:
src_path = os.path.join(so_search_dir, so_file)
dev_path = os.path.join(install_dir, so_file)
dst_path = os.path.join(self.build_lib, build_install_dir, so_file)

os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy(src_path, dst_path)
print(f"[INFO] Copied {so_file} → {dst_path}")

if isinstance(self.distribution.get_command_obj("develop"), develop):
os.makedirs(os.path.dirname(dev_path), exist_ok=True)
shutil.copy(src_path, dev_path)
print(f"[INFO] Copied in editable mode {so_file} → {dev_path}")

if not so_file:
raise FileNotFoundError(
"Compiled .so file not found in output/lib directory."
)

src_path = os.path.join(so_search_dir, so_file)
dev_path = os.path.join(INSTALL_DIR, so_file)
dst_path = os.path.join(self.build_lib, "ucm", "store", so_file)
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy(src_path, dst_path)
print(f"[INFO] Copied {src_path} → {dst_path}")
if isinstance(self.distribution.get_command_obj("develop"), develop):
shutil.copy(src_path, dev_path)
print(f"[INFO] Copied in editable mode {src_path} → {dev_path}")
ext_modules = []

ext_modules.append(CMakeExtension(name="nfsstore", sourcedir=FSSTORE_SRC_DIR))
ext_modules.append(CMakeExtension(name="gsa_offload_ops", sourcedir=GSA_SRC_DIR))
ext_modules.append(CMakeExtension(name="gsa_prefetch", sourcedir=PREFETCH_SRC_DIR))

setup(
name="ucm",
Expand All @@ -120,7 +160,7 @@ def build_cmake(self, ext: CMakeExtension):
author="Unified Cache Team",
packages=find_packages(),
python_requires=">=3.10",
ext_modules=[CMakeExtension(name="ucmnfsstore", sourcedir=SRC_DIR)],
ext_modules=ext_modules,
cmdclass={"build_ext": CMakeBuild},
zip_safe=False,
)
141 changes: 141 additions & 0 deletions ucm/csrc/gsaoffloadops/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
cmake_minimum_required(VERSION 3.16)
project(gsa_offload_ops)

# 设置C++标准
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# 从环境变量获取设备类型,默认为cuda
set(RUNTIME_ENVIRONMENT $ENV{RUNTIME_ENVIRONMENT})
if(NOT RUNTIME_ENVIRONMENT)
set(RUNTIME_ENVIRONMENT "cuda")
endif()

message(STATUS "[INFO] Building gsa_offload_ops for device: ${RUNTIME_ENVIRONMENT}")

# 查找必要的包
find_package(Python COMPONENTS Interpreter Development REQUIRED)

# 查找PyTorch路径
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import torch; import os; print(os.path.dirname(os.path.abspath(torch.__file__)))"
OUTPUT_VARIABLE PYTORCH_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE PYTORCH_RESULT
)

if(NOT PYTORCH_RESULT EQUAL 0)
message(FATAL_ERROR "Failed to find PyTorch installation")
endif()

# 查找pybind11
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir())"
OUTPUT_VARIABLE pybind11_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE PYBIND11_RESULT
)

if(NOT PYBIND11_RESULT EQUAL 0)
message(FATAL_ERROR "Failed to find pybind11")
endif()

find_package(pybind11 REQUIRED)

# 设置基础编译选项
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fopenmp -march=native")
set(CXX11_ABI "1")

# 根据设备类型配置
set(INCLUDE_DIRS
${PYTORCH_PATH}/include/torch/csrc/api/include
${PYTORCH_PATH}/include
${CMAKE_CURRENT_SOURCE_DIR}/include
)

set(LIBRARY_DIRS
${PYTORCH_PATH}/lib
/usr/local/lib
)

set(LIBRARIES
torch
c10
torch_cpu
torch_python
gomp
pthread
)

# NPU特殊配置
if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
message(STATUS "Configuring for NPU/Ascend device")

# 查找torch_npu路径
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import torch_npu; import os; print(os.path.dirname(os.path.abspath(torch_npu.__file__)))"
OUTPUT_VARIABLE PYTORCH_NPU_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE NPU_RESULT
)

if(NPU_RESULT EQUAL 0)
message(STATUS "Found torch_npu at: ${PYTORCH_NPU_PATH}")
list(INSERT INCLUDE_DIRS 0 ${PYTORCH_NPU_PATH}/include)
list(INSERT LIBRARY_DIRS 0 ${PYTORCH_NPU_PATH}/lib)
list(INSERT LIBRARIES 0 torch_npu)
set(CXX11_ABI "0")
else()
message(WARNING "torch_npu not found, but RUNTIME_ENVIRONMENT is set to ascend")
endif()
endif()

# 设置CXX11_ABI宏
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI}")

# 查找OpenMP
find_package(OpenMP REQUIRED)

# 定义源文件
set(SOURCES
src/thread_safe_queue.cpp
src/vec_product.cpp
src/k_repre.cpp
src/select_topk_block.cpp
src/cal_kpre_and_topk.cpp
src/pybinds.cpp
)

# 创建pybind11模块
pybind11_add_module(gsa_offload_ops ${SOURCES})

# 设置头文件目录
target_include_directories(gsa_offload_ops PRIVATE ${INCLUDE_DIRS})

# 设置库文件目录
target_link_directories(gsa_offload_ops PRIVATE ${LIBRARY_DIRS})

# 链接库
target_link_libraries(gsa_offload_ops PRIVATE ${LIBRARIES})

# 链接OpenMP
if(OpenMP_CXX_FOUND)
target_link_libraries(gsa_offload_ops PRIVATE OpenMP::OpenMP_CXX)
endif()

# 确保输出目录存在
set(OUTPUT_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/output/lib)
file(MAKE_DIRECTORY ${OUTPUT_LIB_DIR})

# 设置输出路径
set_target_properties(gsa_offload_ops PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_LIB_DIR}
)

# 编译后输出信息
add_custom_command(TARGET gsa_offload_ops POST_BUILD
COMMAND ${CMAKE_COMMAND} -E echo "Built gsa_offload_ops successfully for ${RUNTIME_ENVIRONMENT}"
COMMAND ${CMAKE_COMMAND} -E echo "CXX11_ABI=${CXX11_ABI}"
COMMAND ${CMAKE_COMMAND} -E echo "Output location: ${OUTPUT_LIB_DIR}"
)
Loading