Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/unifiedcache_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- name: Print debug info
run: |
echo "GITHUB_WORKSPACE is $GITHUB_WORKSPACE"
ls -l $GITHUB_WORKSPACE/unifiedcache/patch
ls -l $GITHUB_WORKSPACE/ucm/patch

- name: Debug repo layout
run: |
Expand All @@ -60,7 +60,7 @@ jobs:
ls -l "$GITHUB_WORKSPACE"
- name: Apply patch
run: |
git -C /vllm-workspace/vllm apply $GITHUB_WORKSPACE/unifiedcache/patch/0.9.2/vllm-adapt.patch
git -C /vllm-workspace/vllm apply $GITHUB_WORKSPACE/ucm/patch/0.9.2/vllm-adapt.patch


- name: Install unified-cache-management
Expand Down
16 changes: 8 additions & 8 deletions ucm/patch/0.9.2/vllm-adapt-sparse.patch
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ index f0ad68b16..847c97371 100644
from vllm.platforms import _Backend, current_platform
from vllm.utils import direct_register_custom_op
from vllm.v1.attention.backends.utils import validate_kv_sharing_target
+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse


class Attention(nn.Module):
Expand Down Expand Up @@ -114,8 +114,8 @@ index 6937455e7..3a44db442 100644
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.metrics.stats import PrefixCacheStats
from vllm.v1.request import Request, RequestStatus
+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from unifiedcache.integration.vllm.ucm_sparse.base import INVALID_SLOT
+from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.base import INVALID_SLOT

logger = init_logger(__name__)

Expand Down Expand Up @@ -179,8 +179,8 @@ index 22c0ad8d6..c5c39a2b8 100644
from vllm.v1.spec_decode.metrics import SpecDecodingStats
from vllm.v1.structured_output import StructuredOutputManager

+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized, get_ucm_sparse, has_ucm_sparse
+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseBase, UcmSparseRole, INVALID_SLOT
+from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized, get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.base import UcmSparseBase, UcmSparseRole, INVALID_SLOT
+
logger = init_logger(__name__)

Expand Down Expand Up @@ -313,8 +313,8 @@ index 14278bb6a..84a597b0e 100644
from .utils import (gather_mm_placeholders, initialize_kv_cache_for_kv_sharing,
sanity_check_mm_encoder_outputs, scatter_mm_placeholders)

+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT
+from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT
+
if TYPE_CHECKING:
import xgrammar as xgr
Expand Down Expand Up @@ -482,7 +482,7 @@ index 7117f60b5..c239e1f02 100644
from vllm.v1.utils import report_usage_stats
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
from vllm.v1.worker.worker_base import WorkerBase
+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized
+from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized

logger = init_logger(__name__)

Expand Down
8 changes: 4 additions & 4 deletions ucm/patch/0.9.2/vllm-ascend-adapt-sparse.patch
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ index 915feb7..0600f35 100644
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
nd_to_nz_2d, nd_to_nz_spec)

+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+

class AscendAttentionBackend(AttentionBackend):
Expand Down Expand Up @@ -91,8 +91,8 @@ index f9cca93..31766b2 100644
from vllm_ascend.worker.mtp_proposer_v1 import MtpProposer
from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch

+from unifiedcache.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from unifiedcache.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT
+from ucm.integration.vllm.ucm_sparse.state import get_ucm_sparse, has_ucm_sparse
+from ucm.integration.vllm.ucm_sparse.base import UcmSparseMetadata, INVALID_SLOT
+
if TYPE_CHECKING:
import xgrammar as xgr # type: ignore[import-untyped]
Expand Down Expand Up @@ -250,7 +250,7 @@ index df03d50..6ea5bf3 100644
read_kv_cache_bytes_from_file,
sleep_mode_enabled, try_register_lib)
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
+from unifiedcache.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized
+from ucm.integration.vllm.ucm_sparse.state import ensure_ucm_sparse_initialized


class NPUWorker(WorkerBase):
Expand Down