From e49307ef2070b7b3b55afa8681c7ff53916df00f Mon Sep 17 00:00:00 2001 From: NaganooMei <290992347@qq.com> Date: Fri, 5 Sep 2025 15:22:56 +0800 Subject: [PATCH] modify patch and workflow --- .github/workflows/unifiedcache_test.yml | 4 ++-- ucm/patch/0.9.2/vllm-adapt-sparse.patch | 16 ++++++++-------- ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/unifiedcache_test.yml b/.github/workflows/unifiedcache_test.yml index 3d1002b7..1b8a87fe 100644 --- a/.github/workflows/unifiedcache_test.yml +++ b/.github/workflows/unifiedcache_test.yml @@ -51,7 +51,7 @@ jobs: - name: Print debug info run: | echo "GITHUB_WORKSPACE is $GITHUB_WORKSPACE" - ls -l $GITHUB_WORKSPACE/unifiedcache/patch + ls -l $GITHUB_WORKSPACE/ucm/patch - name: Debug repo layout run: | @@ -60,7 +60,7 @@ jobs: ls -l "$GITHUB_WORKSPACE" - name: Apply patch run: | - git -C /vllm-workspace/vllm apply $GITHUB_WORKSPACE/unifiedcache/patch/0.9.2/vllm-adapt.patch + git -C /vllm-workspace/vllm apply $GITHUB_WORKSPACE/ucm/patch/0.9.2/vllm-adapt.patch - name: Install unified-cache-management diff --git a/ucm/patch/0.9.2/vllm-adapt-sparse.patch b/ucm/patch/0.9.2/vllm-adapt-sparse.patch index f2da8748..100304e8 100644 --- a/ucm/patch/0.9.2/vllm-adapt-sparse.patch +++ b/ucm/patch/0.9.2/vllm-adapt-sparse.patch @@ -21,7 +21,7 @@ index f0ad68b16..847c97371 100644 from vllm.platforms import _Backend, current_platform from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.utils import validate_kv_sharing_target -+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse class Attention(nn.Module): @@ -114,8 +114,8 @@ index 6937455e7..3a44db442 100644 from vllm.v1.kv_cache_interface import KVCacheConfig from vllm.v1.metrics.stats import PrefixCacheStats from vllm.v1.request import Request, RequestStatus -+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse -+from unifiedcache.integration.vllm.ucm_sparse.base import INVALID_SLOT ++from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.base import INVALID_SLOT logger = init_logger(__name__) @@ -179,8 +179,8 @@ index 22c0ad8d6..c5c39a2b8 100644 from vllm.v1.spec_decode.metrics import SpecDecodingStats from vllm.v1.structured_output import StructuredOutputManager -+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized, get_ucm_sparse, has_ucm_sparse -+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseBase, UcmSparseRole, INVALID_SLOT ++from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized, get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.base import UcmSparseBase, UcmSparseRole, INVALID_SLOT + logger = init_logger(__name__) @@ -313,8 +313,8 @@ index 14278bb6a..84a597b0e 100644 from .utils import (gather_mm_placeholders, initialize_kv_cache_for_kv_sharing, sanity_check_mm_encoder_outputs, scatter_mm_placeholders) -+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse -+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT ++from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT + if TYPE_CHECKING: import xgrammar as xgr @@ -482,7 +482,7 @@ index 7117f60b5..c239e1f02 100644 from vllm.v1.utils import report_usage_stats from vllm.v1.worker.gpu_model_runner import GPUModelRunner from vllm.v1.worker.worker_base import WorkerBase -+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized ++from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized logger = init_logger(__name__) diff --git a/ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch b/ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch index 97889a43..4d8c9786 100644 --- a/ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch +++ b/ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch @@ -17,7 +17,7 @@ index 915feb7..0600f35 100644 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p, nd_to_nz_2d, nd_to_nz_spec) -+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse + class AscendAttentionBackend(AttentionBackend): @@ -91,8 +91,8 @@ index f9cca93..31766b2 100644 from vllm_ascend.worker.mtp_proposer_v1 import MtpProposer from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch -+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse -+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT ++from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse ++from ucm.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT + if TYPE_CHECKING: import xgrammar as xgr # type: ignore[import-untyped] @@ -250,7 +250,7 @@ index df03d50..6ea5bf3 100644 read_kv_cache_bytes_from_file, sleep_mode_enabled, try_register_lib) from vllm_ascend.worker.model_runner_v1 import NPUModelRunner -+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized ++from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized class NPUWorker(WorkerBase):