From 95113f8a103c28c37c312a983b2222854519dcb3 Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Mon, 24 Nov 2025 19:45:14 -0800
Subject: [PATCH 1/7] adapt GQA & modify config.yaml

---
 examples/ucm_config_example.yaml      | 11 ++--
 ucm/integration/vllm/ucm_connector.py | 91 ++++++++++++++-------------
 2 files changed, 54 insertions(+), 48 deletions(-)

diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml
index 581db495..8d3897fd 100644
--- a/examples/ucm_config_example.yaml
+++ b/examples/ucm_config_example.yaml
@@ -8,12 +8,11 @@
 # for backward compatibility.
 
 # Connector name (e.g., "UcmNfsStore", "UcmDramStore")
-ucm_connector_name: "UcmNfsStore"
-
-# Connector-specific configuration
-ucm_connector_config:
-  storage_backends: "/mnt/test"
-  transferIoDirect: false
+ucm_connectors:
+  - ucm_connector_name: "UcmNfsStore"
+    ucm_connector_config:
+      storage_backends: "/mnt/test"
+      transferIoDirect: false
 
 load_only_first_rank: false
 
diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 333d0930..b4d92817 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -58,15 +58,14 @@ class RequestHasher:
 
     def __init__(self):
         if RequestHasher._SEED_HASH is None:
-            RequestHasher._SEED_HASH = self._md5("UCM_HASH_SEED")
+            RequestHasher._SEED_HASH = self("UCM_HASH_SEED")
 
-    @staticmethod
-    def _md5(input_data) -> int:
+    def __call__(self, input_data) -> int:
         input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL)
         md5_bytes = hashlib.md5(input_bytes).digest()
         return int.from_bytes(md5_bytes, byteorder="big")
 
-    def __call__(self, block_size: int, request: "Request") -> list[str]:
+    def process(self, block_size: int, request: "Request") -> list[str]:
         token_ids = request.all_token_ids
 
         ret = []
@@ -82,7 +81,7 @@ def __call__(self, block_size: int, request: "Request") -> list[str]:
                 parent_block_hash_value = RequestHasher._SEED_HASH
 
             block_token_ids_tuple = tuple(block_token_ids)
-            hash_value = self._md5((parent_block_hash_value, block_token_ids_tuple))
+            hash_value = self((parent_block_hash_value, block_token_ids_tuple))
             parent_block_hash_value = hash_value
             ret.append(str(hash_value))
 
@@ -114,7 +113,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
             torch_dev = torch.npu
             dev_name = "npu"
         else:
-            raise RuntimeError("Unsupported device platform for LMCache engine.")
+            raise RuntimeError("Unsupported device platform for UCMDirectConnector.")
 
         if self.rank >= 0:
             self.device = torch_dev.device(f"{dev_name}:{self.rank}")
@@ -139,41 +138,39 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
                 self.broadcast_fn = self.group_coordinator.broadcast
                 self.broadcast_stream = torch.cuda.Stream()
 
-        if "ucm_connector_name" in self.launch_config:
-            name = self.launch_config.get("ucm_connector_name")
-            config = self.launch_config.get("ucm_connector_config") or {}
-            config["device"] = self.rank
-            config["role"] = (
-                "scheduler" if role == KVConnectorRole.SCHEDULER else "worker"
-            )
-            element_size = vllm_config.model_config.dtype.itemsize
-            single_head_dim = vllm_config.model_config.get_head_size()
-            num_head_per_tp = vllm_config.model_config.get_num_kv_heads(
-                vllm_config.parallel_config
-            )
-            total_tp_size = vllm_config.parallel_config.tensor_parallel_size
-            num_layers = vllm_config.model_config.get_num_layers(
-                vllm_config.parallel_config
-            )
-            block_size_per_layer = self.block_size * element_size * single_head_dim
-            config["kv_block_size"] = (
-                block_size_per_layer
-                * num_layers
-                * (1 if self.is_mla else num_head_per_tp * total_tp_size * 2)
-            )
-            config["io_size"] = block_size_per_layer * (
-                1 if self.is_mla else num_head_per_tp
-            )
-            self.store = UcmConnectorFactory.create_connector(name, config)
+        connector_configs = self.launch_config.get("ucm_connectors", [])
+        assert len(connector_configs) > 0, "no storage connector name in config."
+
+        name = connector_configs[0].get("ucm_connector_name")
+        config = connector_configs[0].get("ucm_connector_config") or {}
+        config["device"] = self.rank
+        config["role"] = "scheduler" if role == KVConnectorRole.SCHEDULER else "worker"
+        element_size = vllm_config.model_config.dtype.itemsize
+        single_head_dim = vllm_config.model_config.get_head_size()
+        num_head_per_tp = vllm_config.model_config.get_num_kv_heads(
+            vllm_config.parallel_config
+        )
+        total_tp_size = vllm_config.parallel_config.tensor_parallel_size
+        num_layers = vllm_config.model_config.get_num_layers(
+            vllm_config.parallel_config
+        )
+        block_size_per_layer = self.block_size * element_size * single_head_dim
+        config["kv_block_size"] = (
+            block_size_per_layer
+            * num_layers
+            * (1 if self.is_mla else num_head_per_tp * 2)
+        )
+        config["io_size"] = block_size_per_layer * (
+            1 if self.is_mla else num_head_per_tp
+        )
+        self.store = UcmConnectorFactory.create_connector(name, config)
 
-            logger.info("init UCConnectorImpl, connector: %s", name)
-            logger.info(
-                "single file size = %d MB, io_size = %d KB,",
-                config["kv_block_size"] / 1024 / 1024,
-                config["io_size"] / 1024,
-            )
-        else:
-            raise TypeError(f"no storage connector name in config.")
+        logger.info("init UCConnectorImpl, connector: %s", name)
+        logger.info(
+            "single file size = %d MB, io_size = %d KB,",
+            config["kv_block_size"] / 1024 / 1024,
+            config["io_size"] / 1024,
+        )
 
     def get_num_new_matched_tokens(
         self,
@@ -184,7 +181,7 @@ def get_num_new_matched_tokens(
         assert num_computed_tokens % self.block_size == 0
         hbm_hit_block_num = num_computed_tokens // self.block_size
 
-        ucm_block_ids = self.request_hasher(self.block_size, request)
+        ucm_block_ids = self.request_hasher.process(self.block_size, request)
 
         external_block_ids = ucm_block_ids[hbm_hit_block_num:]
         if not external_block_ids:
@@ -210,7 +207,7 @@ def get_num_new_matched_tokens(
         # When all the tokens are cached in ssd or hbm,
         # we need to recompute the last token. This if condition will be removed
         # once vLLM scheduler provides a better solution in the future.
-        if external_hit_tokens == request.num_prompt_tokens:
+        if total_hit_block_num * self.block_size == request.num_tokens:
             external_hit_tokens -= 1
 
         self.requests_meta[request.request_id] = RequestMeta(
@@ -449,6 +446,11 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
                 continue
 
             ucm_block_ids, vllm_block_ids = request.load_block_ids
+            if self.rank != 0:
+                for i, ucm_block_id in enumerate(ucm_block_ids):
+                    ucm_block_ids[i] = str(
+                        self.request_hasher((ucm_block_id, self.rank))
+                    )
             ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task(
                 vllm_block_ids, ucm_block_ids
             )
@@ -495,6 +497,11 @@ def wait_for_save(self) -> None:
                 continue
 
             ucm_block_ids, vllm_block_ids = request.dump_block_ids
+            if self.rank != 0:
+                for i, ucm_block_id in enumerate(ucm_block_ids):
+                    ucm_block_ids[i] = str(
+                        self.request_hasher((ucm_block_id, self.rank))
+                    )
             rets = self.store.create(ucm_block_ids)
             end = 0
             for i, ret in enumerate(rets):

From 3aed1caee9ca2646288ba9819d7d3f3145bdcf92 Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Mon, 24 Nov 2025 21:07:36 -0800
Subject: [PATCH 2/7] move process to UCMDirectConnector

---
 ucm/integration/vllm/ucm_connector.py | 48 ++++++++++++++-------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index b4d92817..863fadf1 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -65,28 +65,6 @@ def __call__(self, input_data) -> int:
         md5_bytes = hashlib.md5(input_bytes).digest()
         return int.from_bytes(md5_bytes, byteorder="big")
 
-    def process(self, block_size: int, request: "Request") -> list[str]:
-        token_ids = request.all_token_ids
-
-        ret = []
-        parent_block_hash_value = None
-        for start in range(0, len(token_ids), block_size):
-            end = start + block_size
-            block_token_ids = token_ids[start:end]
-            # Do not hash the block if it is not full.
-            if len(block_token_ids) < block_size:
-                break
-
-            if not parent_block_hash_value:
-                parent_block_hash_value = RequestHasher._SEED_HASH
-
-            block_token_ids_tuple = tuple(block_token_ids)
-            hash_value = self((parent_block_hash_value, block_token_ids_tuple))
-            parent_block_hash_value = hash_value
-            ret.append(str(hash_value))
-
-        return ret
-
 
 class UCMDirectConnector(KVConnectorBase_V1):
     """
@@ -172,6 +150,30 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
             config["io_size"] / 1024,
         )
 
+    def generate_hash(self, block_size: int, request: "Request") -> list[str]:
+        token_ids = request.all_token_ids
+
+        ret = []
+        parent_block_hash_value = None
+        for start in range(0, len(token_ids), block_size):
+            end = start + block_size
+            block_token_ids = token_ids[start:end]
+            # Do not hash the block if it is not full.
+            if len(block_token_ids) < block_size:
+                break
+
+            if not parent_block_hash_value:
+                parent_block_hash_value = RequestHasher._SEED_HASH
+
+            block_token_ids_tuple = tuple(block_token_ids)
+            hash_value = self.request_hasher(
+                (parent_block_hash_value, block_token_ids_tuple)
+            )
+            parent_block_hash_value = hash_value
+            ret.append(str(hash_value))
+
+        return ret
+
     def get_num_new_matched_tokens(
         self,
         request: "Request",
@@ -181,7 +183,7 @@ def get_num_new_matched_tokens(
         assert num_computed_tokens % self.block_size == 0
         hbm_hit_block_num = num_computed_tokens // self.block_size
 
-        ucm_block_ids = self.request_hasher.process(self.block_size, request)
+        ucm_block_ids = self.generate_hash(self.block_size, request)
 
         external_block_ids = ucm_block_ids[hbm_hit_block_num:]
         if not external_block_ids:

From c0c041e0d63c8e96b747fc2d18c46e8369c5f92d Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Tue, 25 Nov 2025 00:12:22 -0800
Subject: [PATCH 3/7] fix comment

---
 ucm/integration/vllm/ucm_connector.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 863fadf1..701be004 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -56,13 +56,24 @@ class RequestHasher:
 
     _SEED_HASH = None
 
-    def __init__(self):
+    def __init__(self, vllm_config: "VllmConfig"):
+        self.model = vllm_config.model_config.model
+        self.world_size = vllm_config.parallel_config.world_size
+        self.dtype = vllm_config.model_config.dtype
+
+        meta = (self.model, self.world_size, str(self.dtype))
+        self.meta_bytes = pickle.dumps(meta, protocol=pickle.HIGHEST_PROTOCOL)
         if RequestHasher._SEED_HASH is None:
             RequestHasher._SEED_HASH = self("UCM_HASH_SEED")
 
     def __call__(self, input_data) -> int:
         input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL)
-        md5_bytes = hashlib.md5(input_bytes).digest()
+
+        h = hashlib.md5()
+        h.update(self.meta_bytes)
+        h.update(input_bytes)
+
+        md5_bytes = h.digest()
         return int.from_bytes(md5_bytes, byteorder="big")
 
 
@@ -99,7 +110,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
 
         self.store: UcmKVStoreBase
 
-        self.request_hasher = RequestHasher()
+        self.request_hasher = RequestHasher(vllm_config)
 
         # save block info, avoid hash request twice, and track them until request finished
         self.requests_meta: dict[str, RequestMeta] = {}

From 910f34250c2603a505aecab68fa728b7d35a17cd Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Tue, 25 Nov 2025 19:37:48 -0800
Subject: [PATCH 4/7] modify hash function

---
 examples/ucm_config_example.yaml         |  2 +-
 ucm/integration/vllm/ucm_connector.py    | 27 +++++++++++++-----------
 ucm/store/nfsstore/nfsstore_connector.py |  2 +-
 ucm/store/pcstore/pcstore_connector.py   |  2 +-
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml
index 8d3897fd..255e1bd0 100644
--- a/examples/ucm_config_example.yaml
+++ b/examples/ucm_config_example.yaml
@@ -11,7 +11,7 @@
 ucm_connectors:
   - ucm_connector_name: "UcmNfsStore"
     ucm_connector_config:
-      storage_backends: "/mnt/test"
+      storageBackends: "/mnt/test"
       transferIoDirect: false
 
 load_only_first_rank: false
diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 701be004..8e2e3c04 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -56,25 +56,28 @@ class RequestHasher:
 
     _SEED_HASH = None
 
-    def __init__(self, vllm_config: "VllmConfig"):
+    def __init__(self, vllm_config: "VllmConfig", rank_id: int):
         self.model = vllm_config.model_config.model
         self.world_size = vllm_config.parallel_config.world_size
-        self.dtype = vllm_config.model_config.dtype
+        self.dtype = str(vllm_config.model_config.dtype)
+        self.rank = rank_id
+
+        meta = f"{self.model}:{self.world_size}:{self.dtype}:{self.rank}"
+        meta_bytes = meta.encode("utf-8")
+
+        self._prefix_md5 = hashlib.md5()
+        self._prefix_md5.update(meta_bytes)
 
-        meta = (self.model, self.world_size, str(self.dtype))
-        self.meta_bytes = pickle.dumps(meta, protocol=pickle.HIGHEST_PROTOCOL)
         if RequestHasher._SEED_HASH is None:
             RequestHasher._SEED_HASH = self("UCM_HASH_SEED")
 
     def __call__(self, input_data) -> int:
         input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL)
 
-        h = hashlib.md5()
-        h.update(self.meta_bytes)
+        h = self._prefix_md5.copy()
         h.update(input_bytes)
 
-        md5_bytes = h.digest()
-        return int.from_bytes(md5_bytes, byteorder="big")
+        return int.from_bytes(h.digest(), byteorder="big")
 
 
 class UCMDirectConnector(KVConnectorBase_V1):
@@ -110,7 +113,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
 
         self.store: UcmKVStoreBase
 
-        self.request_hasher = RequestHasher(vllm_config)
+        self.request_hasher = RequestHasher(vllm_config, max(0, self.rank))
 
         # save block info, avoid hash request twice, and track them until request finished
         self.requests_meta: dict[str, RequestMeta] = {}
@@ -459,10 +462,10 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
                 continue
 
             ucm_block_ids, vllm_block_ids = request.load_block_ids
-            if self.rank != 0:
+            if self.rank != 0 and not self.is_mla:
                 for i, ucm_block_id in enumerate(ucm_block_ids):
                     ucm_block_ids[i] = str(
-                        self.request_hasher((ucm_block_id, self.rank))
+                        self.request_hasher(ucm_block_id)
                     )
             ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task(
                 vllm_block_ids, ucm_block_ids
@@ -513,7 +516,7 @@ def wait_for_save(self) -> None:
             if self.rank != 0:
                 for i, ucm_block_id in enumerate(ucm_block_ids):
                     ucm_block_ids[i] = str(
-                        self.request_hasher((ucm_block_id, self.rank))
+                        self.request_hasher(ucm_block_id)
                     )
             rets = self.store.create(ucm_block_ids)
             end = 0
diff --git a/ucm/store/nfsstore/nfsstore_connector.py b/ucm/store/nfsstore/nfsstore_connector.py
index 4a348a05..914a9671 100644
--- a/ucm/store/nfsstore/nfsstore_connector.py
+++ b/ucm/store/nfsstore/nfsstore_connector.py
@@ -41,7 +41,7 @@ def __init__(self, config: Dict):
         super().__init__(config)
         self.store = ucmnfsstore.NFSStore()
         storage_backends = [
-            path for path in config["storage_backends"].split(":") if path
+            path for path in config["storageBackends"].split(":") if path
         ]
         block_size = int(config["kv_block_size"])
         transfer_enable = True if config["role"] == "worker" else False
diff --git a/ucm/store/pcstore/pcstore_connector.py b/ucm/store/pcstore/pcstore_connector.py
index e8486c3d..56b9bfe4 100644
--- a/ucm/store/pcstore/pcstore_connector.py
+++ b/ucm/store/pcstore/pcstore_connector.py
@@ -41,7 +41,7 @@ def __init__(self, config: Dict):
         super().__init__(config)
         self.store = ucmpcstore.PcStore()
         storage_backends = [
-            path for path in config["storage_backends"].split(":") if path
+            path for path in config["storageBackends"].split(":") if path
         ]
         block_size = int(config["kv_block_size"])
         transfer_enable = True if config["role"] == "worker" else False

From 1e1cfc60d2bfd6fb8517ab6d3f4d51561d7f58ae Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Tue, 25 Nov 2025 19:42:07 -0800
Subject: [PATCH 5/7] fix style

---
 ucm/integration/vllm/ucm_connector.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 8e2e3c04..4004de77 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -464,9 +464,7 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
             ucm_block_ids, vllm_block_ids = request.load_block_ids
             if self.rank != 0 and not self.is_mla:
                 for i, ucm_block_id in enumerate(ucm_block_ids):
-                    ucm_block_ids[i] = str(
-                        self.request_hasher(ucm_block_id)
-                    )
+                    ucm_block_ids[i] = str(self.request_hasher(ucm_block_id))
             ucm_total_block_ids, ucm_offsets, dst_tensor_addr = self._generate_task(
                 vllm_block_ids, ucm_block_ids
             )
@@ -515,9 +513,7 @@ def wait_for_save(self) -> None:
             ucm_block_ids, vllm_block_ids = request.dump_block_ids
             if self.rank != 0:
                 for i, ucm_block_id in enumerate(ucm_block_ids):
-                    ucm_block_ids[i] = str(
-                        self.request_hasher(ucm_block_id)
-                    )
+                    ucm_block_ids[i] = str(self.request_hasher(ucm_block_id))
             rets = self.store.create(ucm_block_ids)
             end = 0
             for i, ret in enumerate(rets):

From 97d93de7fc3f6a7c555cc3fd3e434bafc46c920f Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Wed, 26 Nov 2025 00:02:02 -0800
Subject: [PATCH 6/7] code style and modify hash

---
 examples/ucm_config_example.yaml         |  4 ++--
 ucm/integration/vllm/ucm_connector.py    | 28 ++++++++++--------------
 ucm/store/nfsstore/nfsstore_connector.py |  4 ++--
 ucm/store/pcstore/pcstore_connector.py   |  2 +-
 4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/examples/ucm_config_example.yaml b/examples/ucm_config_example.yaml
index 255e1bd0..b7207942 100644
--- a/examples/ucm_config_example.yaml
+++ b/examples/ucm_config_example.yaml
@@ -11,8 +11,8 @@
 ucm_connectors:
   - ucm_connector_name: "UcmNfsStore"
     ucm_connector_config:
-      storageBackends: "/mnt/test"
-      transferIoDirect: false
+      storage_backends: "/mnt/test"
+      use_direct: false
 
 load_only_first_rank: false
 
diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 4004de77..8e8bddac 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -56,27 +56,20 @@ class RequestHasher:
 
     _SEED_HASH = None
 
-    def __init__(self, vllm_config: "VllmConfig", rank_id: int):
-        self.model = vllm_config.model_config.model
-        self.world_size = vllm_config.parallel_config.world_size
-        self.dtype = str(vllm_config.model_config.dtype)
-        self.rank = rank_id
-
-        meta = f"{self.model}:{self.world_size}:{self.dtype}:{self.rank}"
-        meta_bytes = meta.encode("utf-8")
-
-        self._prefix_md5 = hashlib.md5()
-        self._prefix_md5.update(meta_bytes)
+    def __init__(self, vllm_config, rank_id):
+        meta = f"{vllm_config.model_config.model}:{vllm_config.parallel_config.world_size}:{vllm_config.model_config.dtype}:{rank_id}"
+        self.meta_bytes = meta.encode("utf-8")
 
         if RequestHasher._SEED_HASH is None:
             RequestHasher._SEED_HASH = self("UCM_HASH_SEED")
 
     def __call__(self, input_data) -> int:
-        input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL)
-
-        h = self._prefix_md5.copy()
-        h.update(input_bytes)
+        if isinstance(input_data, str):
+            input_bytes = input_data.encode("utf-8")
+        else:
+            input_bytes = pickle.dumps(input_data, protocol=pickle.HIGHEST_PROTOCOL)
 
+        h = hashlib.md5(self.meta_bytes + input_bytes)
         return int.from_bytes(h.digest(), byteorder="big")
 
 
@@ -113,7 +106,10 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
 
         self.store: UcmKVStoreBase
 
-        self.request_hasher = RequestHasher(vllm_config, max(0, self.rank))
+        if role == KVConnectorRole.SCHEDULER:
+            self.request_hasher = RequestHasher(vllm_config, 0)
+        else:
+            self.request_hasher = RequestHasher(vllm_config, self.rank)
 
         # save block info, avoid hash request twice, and track them until request finished
         self.requests_meta: dict[str, RequestMeta] = {}
diff --git a/ucm/store/nfsstore/nfsstore_connector.py b/ucm/store/nfsstore/nfsstore_connector.py
index 914a9671..bd30f628 100644
--- a/ucm/store/nfsstore/nfsstore_connector.py
+++ b/ucm/store/nfsstore/nfsstore_connector.py
@@ -41,7 +41,7 @@ def __init__(self, config: Dict):
         super().__init__(config)
         self.store = ucmnfsstore.NFSStore()
         storage_backends = [
-            path for path in config["storageBackends"].split(":") if path
+            path for path in config["storage_backends"].split(":") if path
         ]
         block_size = int(config["kv_block_size"])
         transfer_enable = True if config["role"] == "worker" else False
@@ -51,7 +51,7 @@ def __init__(self, config: Dict):
         if transfer_enable:
             param.transferDeviceId = config["device"]
             param.transferIoSize = config["io_size"]
-            param.transferIoDirect = config.get("transferIoDirect", False)
+            param.transferIoDirect = config.get("use_direct", False)
 
         # NOTE: compatible with legacy nfsstore lib
         if hasattr(param, "storageCapacity"):
diff --git a/ucm/store/pcstore/pcstore_connector.py b/ucm/store/pcstore/pcstore_connector.py
index 56b9bfe4..e8486c3d 100644
--- a/ucm/store/pcstore/pcstore_connector.py
+++ b/ucm/store/pcstore/pcstore_connector.py
@@ -41,7 +41,7 @@ def __init__(self, config: Dict):
         super().__init__(config)
         self.store = ucmpcstore.PcStore()
         storage_backends = [
-            path for path in config["storageBackends"].split(":") if path
+            path for path in config["storage_backends"].split(":") if path
         ]
         block_size = int(config["kv_block_size"])
         transfer_enable = True if config["role"] == "worker" else False

From ed54c5ca21e188efd39567e3f78a7bd645b65529 Mon Sep 17 00:00:00 2001
From: qyh111 <qiuyuhao1@huawei.com>
Date: Wed, 26 Nov 2025 01:07:16 -0800
Subject: [PATCH 7/7] init parent_block_hash_value

---
 ucm/integration/vllm/ucm_connector.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/ucm/integration/vllm/ucm_connector.py b/ucm/integration/vllm/ucm_connector.py
index 8e8bddac..8bec088f 100644
--- a/ucm/integration/vllm/ucm_connector.py
+++ b/ucm/integration/vllm/ucm_connector.py
@@ -164,7 +164,7 @@ def generate_hash(self, block_size: int, request: "Request") -> list[str]:
         token_ids = request.all_token_ids
 
         ret = []
-        parent_block_hash_value = None
+        parent_block_hash_value = RequestHasher._SEED_HASH
         for start in range(0, len(token_ids), block_size):
             end = start + block_size
             block_token_ids = token_ids[start:end]
@@ -172,9 +172,6 @@ def generate_hash(self, block_size: int, request: "Request") -> list[str]:
             if len(block_token_ids) < block_size:
                 break
 
-            if not parent_block_hash_value:
-                parent_block_hash_value = RequestHasher._SEED_HASH
-
             block_token_ids_tuple = tuple(block_token_ids)
             hash_value = self.request_hasher(
                 (parent_block_hash_value, block_token_ids_tuple)