From 95113f8a103c28c37c312a983b2222854519dcb3 Mon Sep 17 00:00:00 2001 From: qyh111 Date: Mon, 24 Nov 2025 19:45:14 -0800 Subject: [PATCH 1/7] adapt GQA & modify config.yaml --- examples/ucm_config_example.yaml | 11 ++-- ucm/integration/vllm/ucm_connector.py | 91 ++++++++++++++------------- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml index 581db495..8d3897fd 100644 --- a/examples/ucm_config_example.yaml +++ b/examples/ucm_config_example.yaml @@ -8,12 +8,11 @@ # for backward compatibility. # Connector name (e.g., "UcmNfsStore", "UcmDramStore") -ucm_connector_name: "UcmNfsStore" - -# Connector-specific configuration -ucm_connector_config: - storage_backends: "/mnt/test" - transferIoDirect: false +ucm_connectors: + - ucm_connector_name: "UcmNfsStore" + ucm_connector_config: + storage_backends: "/mnt/test" + transferIoDirect: false load_only_first_rank: false diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 333d0930..b4d92817 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -58,15 +58,14 @@ class RequestHasher: def __init__(self): if RequestHasher._SEED_HASH is None: - RequestHasher._SEED_HASH = self._md5("UCM_HASH_SEED") + RequestHasher._SEED_HASH = self("UCM_HASH_SEED") - @staticmethod - def _md5(input_data) -> int: + def __call__(self, input_data) -> int: input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL) md5_bytes = hashlib.md5(input_bytes).digest() return int.from_bytes(md5_bytes, byteorder="big") - def __call__(self, block_size: int, request: "Request") -> list[str]: + def process(self, block_size: int, request: "Request") -> list[str]: token_ids = request.all_token_ids ret = [] @@ -82,7 +81,7 @@ def __call__(self, block_size: int, request: "Request") -> list[str]: parent_block_hash_value = RequestHasher._SEED_HASH block_token_ids_tuple = tuple(block_token_ids) - hash_value = self._md5((parent_block_hash_value, block_token_ids_tuple)) + hash_value = self((parent_block_hash_value, block_token_ids_tuple)) parent_block_hash_value = hash_value ret.append(str(hash_value)) @@ -114,7 +113,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): torch_dev = torch.npu dev_name = "npu" else: - raise RuntimeError("Unsupported device platform for LMCache engine.") + raise RuntimeError("Unsupported device platform for UCMDirectConnector.") if self.rank >= 0: self.device = torch_dev.device(f"{dev_name}:{self.rank}") @@ -139,41 +138,39 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.broadcast_fn = self.group_coordinator.broadcast self.broadcast_stream = torch.cuda.Stream() - if "ucm_connector_name" in self.launch_config: - name = self.launch_config.get("ucm_connector_name") - config = self.launch_config.get("ucm_connector_config") or {} - config["device"] = self.rank - config["role"] = ( - "scheduler" if role == KVConnectorRole.SCHEDULER else "worker" - ) - element_size = vllm_config.model_config.dtype.itemsize - single_head_dim = vllm_config.model_config.get_head_size() - num_head_per_tp = vllm_config.model_config.get_num_kv_heads( - vllm_config.parallel_config - ) - total_tp_size = vllm_config.parallel_config.tensor_parallel_size - num_layers = vllm_config.model_config.get_num_layers( - vllm_config.parallel_config - ) - block_size_per_layer = self.block_size * element_size * single_head_dim - config["kv_block_size"] = ( - block_size_per_layer - * num_layers - * (1 if self.is_mla else num_head_per_tp * total_tp_size * 2) - ) - config["io_size"] = block_size_per_layer * ( - 1 if self.is_mla else num_head_per_tp - ) - self.store = UcmConnectorFactory.create_connector(name, config) + connector_configs = self.launch_config.get("ucm_connectors", []) + assert len(connector_configs) > 0, "no storage connector name in config." + + name = connector_configs[0].get("ucm_connector_name") + config = connector_configs[0].get("ucm_connector_config") or {} + config["device"] = self.rank + config["role"] = "scheduler" if role == KVConnectorRole.SCHEDULER else "worker" + element_size = vllm_config.model_config.dtype.itemsize + single_head_dim = vllm_config.model_config.get_head_size() + num_head_per_tp = vllm_config.model_config.get_num_kv_heads( + vllm_config.parallel_config + ) + total_tp_size = vllm_config.parallel_config.tensor_parallel_size + num_layers = vllm_config.model_config.get_num_layers( + vllm_config.parallel_config + ) + block_size_per_layer = self.block_size * element_size * single_head_dim + config["kv_block_size"] = ( + block_size_per_layer + * num_layers + * (1 if self.is_mla else num_head_per_tp * 2) + ) + config["io_size"] = block_size_per_layer * ( + 1 if self.is_mla else num_head_per_tp + ) + self.store = UcmConnectorFactory.create_connector(name, config) - logger.info("init UCConnectorImpl, connector: %s", name) - logger.info( - "single file size = %d MB, io_size = %d KB,", - config["kv_block_size"] / 1024 / 1024, - config["io_size"] / 1024, - ) - else: - raise TypeError(f"no storage connector name in config.") + logger.info("init UCConnectorImpl, connector: %s", name) + logger.info( + "single file size = %d MB, io_size = %d KB,", + config["kv_block_size"] / 1024 / 1024, + config["io_size"] / 1024, + ) def get_num_new_matched_tokens( self, @@ -184,7 +181,7 @@ def get_num_new_matched_tokens( assert num_computed_tokens % self.block_size == 0 hbm_hit_block_num = num_computed_tokens // self.block_size - ucm_block_ids = self.request_hasher(self.block_size, request) + ucm_block_ids = self.request_hasher.process(self.block_size, request) external_block_ids = ucm_block_ids[hbm_hit_block_num:] if not external_block_ids: @@ -210,7 +207,7 @@ def get_num_new_matched_tokens( # When all the tokens are cached in ssd or hbm, # we need to recompute the last token. This if condition will be removed # once vLLM scheduler provides a better solution in the future. - if external_hit_tokens == request.num_prompt_tokens: + if total_hit_block_num * self.block_size == request.num_tokens: external_hit_tokens -= 1 self.requests_meta[request.request_id] = RequestMeta( @@ -449,6 +446,11 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: continue ucm_block_ids, vllm_block_ids = request.load_block_ids + if self.rank != 0: + for i, ucm_block_id in enumerate(ucm_block_ids): + ucm_block_ids[i] = str( + self.request_hasher((ucm_block_id, self.rank)) + ) ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task( vllm_block_ids, ucm_block_ids ) @@ -495,6 +497,11 @@ def wait_for_save(self) -> None: continue ucm_block_ids, vllm_block_ids = request.dump_block_ids + if self.rank != 0: + for i, ucm_block_id in enumerate(ucm_block_ids): + ucm_block_ids[i] = str( + self.request_hasher((ucm_block_id, self.rank)) + ) rets = self.store.create(ucm_block_ids) end = 0 for i, ret in enumerate(rets): From 3aed1caee9ca2646288ba9819d7d3f3145bdcf92 Mon Sep 17 00:00:00 2001 From: qyh111 Date: Mon, 24 Nov 2025 21:07:36 -0800 Subject: [PATCH 2/7] move process to UCMDirectConnector --- ucm/integration/vllm/ucm_connector.py | 48 ++++++++++++++------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index b4d92817..863fadf1 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -65,28 +65,6 @@ def __call__(self, input_data) -> int: md5_bytes = hashlib.md5(input_bytes).digest() return int.from_bytes(md5_bytes, byteorder="big") - def process(self, block_size: int, request: "Request") -> list[str]: - token_ids = request.all_token_ids - - ret = [] - parent_block_hash_value = None - for start in range(0, len(token_ids), block_size): - end = start + block_size - block_token_ids = token_ids[start:end] - # Do not hash the block if it is not full. - if len(block_token_ids) < block_size: - break - - if not parent_block_hash_value: - parent_block_hash_value = RequestHasher._SEED_HASH - - block_token_ids_tuple = tuple(block_token_ids) - hash_value = self((parent_block_hash_value, block_token_ids_tuple)) - parent_block_hash_value = hash_value - ret.append(str(hash_value)) - - return ret - class UCMDirectConnector(KVConnectorBase_V1): """ @@ -172,6 +150,30 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): config["io_size"] / 1024, ) + def generate_hash(self, block_size: int, request: "Request") -> list[str]: + token_ids = request.all_token_ids + + ret = [] + parent_block_hash_value = None + for start in range(0, len(token_ids), block_size): + end = start + block_size + block_token_ids = token_ids[start:end] + # Do not hash the block if it is not full. + if len(block_token_ids) < block_size: + break + + if not parent_block_hash_value: + parent_block_hash_value = RequestHasher._SEED_HASH + + block_token_ids_tuple = tuple(block_token_ids) + hash_value = self.request_hasher( + (parent_block_hash_value, block_token_ids_tuple) + ) + parent_block_hash_value = hash_value + ret.append(str(hash_value)) + + return ret + def get_num_new_matched_tokens( self, request: "Request", @@ -181,7 +183,7 @@ def get_num_new_matched_tokens( assert num_computed_tokens % self.block_size == 0 hbm_hit_block_num = num_computed_tokens // self.block_size - ucm_block_ids = self.request_hasher.process(self.block_size, request) + ucm_block_ids = self.generate_hash(self.block_size, request) external_block_ids = ucm_block_ids[hbm_hit_block_num:] if not external_block_ids: From c0c041e0d63c8e96b747fc2d18c46e8369c5f92d Mon Sep 17 00:00:00 2001 From: qyh111 Date: Tue, 25 Nov 2025 00:12:22 -0800 Subject: [PATCH 3/7] fix comment --- ucm/integration/vllm/ucm_connector.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 863fadf1..701be004 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -56,13 +56,24 @@ class RequestHasher: _SEED_HASH = None - def __init__(self): + def __init__(self, vllm_config: "VllmConfig"): + self.model = vllm_config.model_config.model + self.world_size = vllm_config.parallel_config.world_size + self.dtype = vllm_config.model_config.dtype + + meta = (self.model, self.world_size, str(self.dtype)) + self.meta_bytes = pickle.dumps(meta, protocol=pickle.HIGHEST_PROTOCOL) if RequestHasher._SEED_HASH is None: RequestHasher._SEED_HASH = self("UCM_HASH_SEED") def __call__(self, input_data) -> int: input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL) - md5_bytes = hashlib.md5(input_bytes).digest() + + h = hashlib.md5() + h.update(self.meta_bytes) + h.update(input_bytes) + + md5_bytes = h.digest() return int.from_bytes(md5_bytes, byteorder="big") @@ -99,7 +110,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.store: UcmKVStoreBase - self.request_hasher = RequestHasher() + self.request_hasher = RequestHasher(vllm_config) # save block info, avoid hash request twice, and track them until request finished self.requests_meta: dict[str, RequestMeta] = {} From 910f34250c2603a505aecab68fa728b7d35a17cd Mon Sep 17 00:00:00 2001 From: qyh111 Date: Tue, 25 Nov 2025 19:37:48 -0800 Subject: [PATCH 4/7] modify hash function --- examples/ucm_config_example.yaml | 2 +- ucm/integration/vllm/ucm_connector.py | 27 +++++++++++++----------- ucm/store/nfsstore/nfsstore_connector.py | 2 +- ucm/store/pcstore/pcstore_connector.py | 2 +- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml index 8d3897fd..255e1bd0 100644 --- a/examples/ucm_config_example.yaml +++ b/examples/ucm_config_example.yaml @@ -11,7 +11,7 @@ ucm_connectors: - ucm_connector_name: "UcmNfsStore" ucm_connector_config: - storage_backends: "/mnt/test" + storageBackends: "/mnt/test" transferIoDirect: false load_only_first_rank: false diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 701be004..8e2e3c04 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -56,25 +56,28 @@ class RequestHasher: _SEED_HASH = None - def __init__(self, vllm_config: "VllmConfig"): + def __init__(self, vllm_config: "VllmConfig", rank_id: int): self.model = vllm_config.model_config.model self.world_size = vllm_config.parallel_config.world_size - self.dtype = vllm_config.model_config.dtype + self.dtype = str(vllm_config.model_config.dtype) + self.rank = rank_id + + meta = f"{self.model}:{self.world_size}:{self.dtype}:{self.rank}" + meta_bytes = meta.encode("utf-8") + + self._prefix_md5 = hashlib.md5() + self._prefix_md5.update(meta_bytes) - meta = (self.model, self.world_size, str(self.dtype)) - self.meta_bytes = pickle.dumps(meta, protocol=pickle.HIGHEST_PROTOCOL) if RequestHasher._SEED_HASH is None: RequestHasher._SEED_HASH = self("UCM_HASH_SEED") def __call__(self, input_data) -> int: input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL) - h = hashlib.md5() - h.update(self.meta_bytes) + h = self._prefix_md5.copy() h.update(input_bytes) - md5_bytes = h.digest() - return int.from_bytes(md5_bytes, byteorder="big") + return int.from_bytes(h.digest(), byteorder="big") class UCMDirectConnector(KVConnectorBase_V1): @@ -110,7 +113,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.store: UcmKVStoreBase - self.request_hasher = RequestHasher(vllm_config) + self.request_hasher = RequestHasher(vllm_config, max(0, self.rank)) # save block info, avoid hash request twice, and track them until request finished self.requests_meta: dict[str, RequestMeta] = {} @@ -459,10 +462,10 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: continue ucm_block_ids, vllm_block_ids = request.load_block_ids - if self.rank != 0: + if self.rank != 0 and not self.is_mla: for i, ucm_block_id in enumerate(ucm_block_ids): ucm_block_ids[i] = str( - self.request_hasher((ucm_block_id, self.rank)) + self.request_hasher(ucm_block_id) ) ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task( vllm_block_ids, ucm_block_ids @@ -513,7 +516,7 @@ def wait_for_save(self) -> None: if self.rank != 0: for i, ucm_block_id in enumerate(ucm_block_ids): ucm_block_ids[i] = str( - self.request_hasher((ucm_block_id, self.rank)) + self.request_hasher(ucm_block_id) ) rets = self.store.create(ucm_block_ids) end = 0 diff --git a/ucm/store/nfsstore/nfsstore_connector.py b/ucm/store/nfsstore/nfsstore_connector.py index 4a348a05..914a9671 100644 --- a/ucm/store/nfsstore/nfsstore_connector.py +++ b/ucm/store/nfsstore/nfsstore_connector.py @@ -41,7 +41,7 @@ def __init__(self, config: Dict): super().__init__(config) self.store = ucmnfsstore.NFSStore() storage_backends = [ - path for path in config["storage_backends"].split(":") if path + path for path in config["storageBackends"].split(":") if path ] block_size = int(config["kv_block_size"]) transfer_enable = True if config["role"] == "worker" else False diff --git a/ucm/store/pcstore/pcstore_connector.py b/ucm/store/pcstore/pcstore_connector.py index e8486c3d..56b9bfe4 100644 --- a/ucm/store/pcstore/pcstore_connector.py +++ b/ucm/store/pcstore/pcstore_connector.py @@ -41,7 +41,7 @@ def __init__(self, config: Dict): super().__init__(config) self.store = ucmpcstore.PcStore() storage_backends = [ - path for path in config["storage_backends"].split(":") if path + path for path in config["storageBackends"].split(":") if path ] block_size = int(config["kv_block_size"]) transfer_enable = True if config["role"] == "worker" else False From 1e1cfc60d2bfd6fb8517ab6d3f4d51561d7f58ae Mon Sep 17 00:00:00 2001 From: qyh111 Date: Tue, 25 Nov 2025 19:42:07 -0800 Subject: [PATCH 5/7] fix style --- ucm/integration/vllm/ucm_connector.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 8e2e3c04..4004de77 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -464,9 +464,7 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: ucm_block_ids, vllm_block_ids = request.load_block_ids if self.rank != 0 and not self.is_mla: for i, ucm_block_id in enumerate(ucm_block_ids): - ucm_block_ids[i] = str( - self.request_hasher(ucm_block_id) - ) + ucm_block_ids[i] = str(self.request_hasher(ucm_block_id)) ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task( vllm_block_ids, ucm_block_ids ) @@ -515,9 +513,7 @@ def wait_for_save(self) -> None: ucm_block_ids, vllm_block_ids = request.dump_block_ids if self.rank != 0: for i, ucm_block_id in enumerate(ucm_block_ids): - ucm_block_ids[i] = str( - self.request_hasher(ucm_block_id) - ) + ucm_block_ids[i] = str(self.request_hasher(ucm_block_id)) rets = self.store.create(ucm_block_ids) end = 0 for i, ret in enumerate(rets): From 97d93de7fc3f6a7c555cc3fd3e434bafc46c920f Mon Sep 17 00:00:00 2001 From: qyh111 Date: Wed, 26 Nov 2025 00:02:02 -0800 Subject: [PATCH 6/7] code style and modify hash --- examples/ucm_config_example.yaml | 4 ++-- ucm/integration/vllm/ucm_connector.py | 28 ++++++++++-------------- ucm/store/nfsstore/nfsstore_connector.py | 4 ++-- ucm/store/pcstore/pcstore_connector.py | 2 +- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml index 255e1bd0..b7207942 100644 --- a/examples/ucm_config_example.yaml +++ b/examples/ucm_config_example.yaml @@ -11,8 +11,8 @@ ucm_connectors: - ucm_connector_name: "UcmNfsStore" ucm_connector_config: - storageBackends: "/mnt/test" - transferIoDirect: false + storage_backends: "/mnt/test" + use_direct: false load_only_first_rank: false diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 4004de77..8e8bddac 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -56,27 +56,20 @@ class RequestHasher: _SEED_HASH = None - def __init__(self, vllm_config: "VllmConfig", rank_id: int): - self.model = vllm_config.model_config.model - self.world_size = vllm_config.parallel_config.world_size - self.dtype = str(vllm_config.model_config.dtype) - self.rank = rank_id - - meta = f"{self.model}:{self.world_size}:{self.dtype}:{self.rank}" - meta_bytes = meta.encode("utf-8") - - self._prefix_md5 = hashlib.md5() - self._prefix_md5.update(meta_bytes) + def __init__(self, vllm_config, rank_id): + meta = f"{vllm_config.model_config.model}:{vllm_config.parallel_config.world_size}:{vllm_config.model_config.dtype}:{rank_id}" + self.meta_bytes = meta.encode("utf-8") if RequestHasher._SEED_HASH is None: RequestHasher._SEED_HASH = self("UCM_HASH_SEED") def __call__(self, input_data) -> int: - input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL) - - h = self._prefix_md5.copy() - h.update(input_bytes) + if isinstance(input_data, str): + input_bytes = input_data.encode("utf-8") + else: + input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL) + h = hashlib.md5(self.meta_bytes + input_bytes) return int.from_bytes(h.digest(), byteorder="big") @@ -113,7 +106,10 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): self.store: UcmKVStoreBase - self.request_hasher = RequestHasher(vllm_config, max(0, self.rank)) + if role == KVConnectorRole.SCHEDULER: + self.request_hasher = RequestHasher(vllm_config, 0) + else: + self.request_hasher = RequestHasher(vllm_config, self.rank) # save block info, avoid hash request twice, and track them until request finished self.requests_meta: dict[str, RequestMeta] = {} diff --git a/ucm/store/nfsstore/nfsstore_connector.py b/ucm/store/nfsstore/nfsstore_connector.py index 914a9671..bd30f628 100644 --- a/ucm/store/nfsstore/nfsstore_connector.py +++ b/ucm/store/nfsstore/nfsstore_connector.py @@ -41,7 +41,7 @@ def __init__(self, config: Dict): super().__init__(config) self.store = ucmnfsstore.NFSStore() storage_backends = [ - path for path in config["storageBackends"].split(":") if path + path for path in config["storage_backends"].split(":") if path ] block_size = int(config["kv_block_size"]) transfer_enable = True if config["role"] == "worker" else False @@ -51,7 +51,7 @@ def __init__(self, config: Dict): if transfer_enable: param.transferDeviceId = config["device"] param.transferIoSize = config["io_size"] - param.transferIoDirect = config.get("transferIoDirect", False) + param.transferIoDirect = config.get("use_direct", False) # NOTE: compatible with legacy nfsstore lib if hasattr(param, "storageCapacity"): diff --git a/ucm/store/pcstore/pcstore_connector.py b/ucm/store/pcstore/pcstore_connector.py index 56b9bfe4..e8486c3d 100644 --- a/ucm/store/pcstore/pcstore_connector.py +++ b/ucm/store/pcstore/pcstore_connector.py @@ -41,7 +41,7 @@ def __init__(self, config: Dict): super().__init__(config) self.store = ucmpcstore.PcStore() storage_backends = [ - path for path in config["storageBackends"].split(":") if path + path for path in config["storage_backends"].split(":") if path ] block_size = int(config["kv_block_size"]) transfer_enable = True if config["role"] == "worker" else False From ed54c5ca21e188efd39567e3f78a7bd645b65529 Mon Sep 17 00:00:00 2001 From: qyh111 Date: Wed, 26 Nov 2025 01:07:16 -0800 Subject: [PATCH 7/7] init parent_block_hash_value --- ucm/integration/vllm/ucm_connector.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py index 8e8bddac..8bec088f 100644 --- a/ucm/integration/vllm/ucm_connector.py +++ b/ucm/integration/vllm/ucm_connector.py @@ -164,7 +164,7 @@ def generate_hash(self, block_size: int, request: "Request") -> list[str]: token_ids = request.all_token_ids ret = [] - parent_block_hash_value = None + parent_block_hash_value = RequestHasher._SEED_HASH for start in range(0, len(token_ids), block_size): end = start + block_size block_token_ids = token_ids[start:end] @@ -172,9 +172,6 @@ def generate_hash(self, block_size: int, request: "Request") -> list[str]: if len(block_token_ids) < block_size: break - if not parent_block_hash_value: - parent_block_hash_value = RequestHasher._SEED_HASH - block_token_ids_tuple = tuple(block_token_ids) hash_value = self.request_hasher( (parent_block_hash_value, block_token_ids_tuple)