From 95a7340382876469c60888393023cc43c7b6ed1f Mon Sep 17 00:00:00 2001
From: sangchengmeng <sangchengmeng@mail.ustc.edu.cn>
Date: Tue, 13 May 2025 15:22:48 +0800
Subject: [PATCH 1/4] debug cache

---
 lightllm/server/core/objs/req.py                       | 2 ++
 lightllm/server/embed_cache/impl/naive_memory_cache.py | 5 +++++
 lightllm/server/httpserver/manager.py                  | 3 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/lightllm/server/core/objs/req.py b/lightllm/server/core/objs/req.py
index c8d8476e51..294fd72c5b 100644
--- a/lightllm/server/core/objs/req.py
+++ b/lightllm/server/core/objs/req.py
@@ -200,9 +200,11 @@ def can_release(self):
         can_released_mark = self.can_released_mark
 
         if self.is_aborted and can_released_mark and ref_count_ok:
+            print("because of aborted, can release")
             return True
 
         if self.finish_status.is_finished() and can_released_mark and ref_count_ok and self.out_tokens_queue.is_empty():
+            print("because of finished, can release")
             return True
 
         return False
diff --git a/lightllm/server/embed_cache/impl/naive_memory_cache.py b/lightllm/server/embed_cache/impl/naive_memory_cache.py
index c03b084c47..36007cbada 100644
--- a/lightllm/server/embed_cache/impl/naive_memory_cache.py
+++ b/lightllm/server/embed_cache/impl/naive_memory_cache.py
@@ -78,6 +78,10 @@ def _clear(self):
         t = time.time()
         for id, record in items:
             if record.ref <= 0 or t - record.visittime >= self.expired_secs:
+                if record.ref <= 0:
+                    logger.info(f"id {id}'s record ref is 0")
+                if t - record.visittime >= self.expired_secs:
+                    logger.info(f"id {id}'s record expired, because of time_expired")
                 if record.data:
                     free_shm(get_shm_name_data(id))
                 if record.embed:
@@ -129,6 +133,7 @@ def alloc(self, md5sum: str, token_num: int) -> dict:
             return {"id": record.id, "token_id": record.token_id, "token_num": record.token_num}
 
     def release(self, id: int) -> None:
+        logger.info(f"Releasing id {id}")
         with self.lock:
             self._records[id].ref -= 1
 
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
index 5ef4112473..5076cdd7e6 100644
--- a/lightllm/server/httpserver/manager.py
+++ b/lightllm/server/httpserver/manager.py
@@ -161,6 +161,7 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
             if multimodal_params is not None:
                 for img in multimodal_params.images:
                     if img.uuid is not None:
+                        logger.info(f"Releasing id {img.uuid}")
                         self.cache_client.root.release(img.uuid)
                         # 将 uuid 等 赋值为 None, 防止因为abort等异常情况造成重复释放异常
                         img.uuid = None
@@ -593,8 +594,8 @@ async def recycle_resource_loop(self):
             release_req_status: List[ReqStatus] = []
             for req_status in self.req_id_to_out_inf.values():
                 if req_status.can_release():
+                    logger.info(f"req_status {req_status.group_req_objs.group_req_id} can release")
                     release_req_status.append(req_status)
-
             for req_status in release_req_status:
                 self.req_id_to_out_inf.pop(req_status.group_req_objs.group_req_id, None)
                 for req in req_status.group_req_objs.shm_req_objs:

From 7e8f6d7ed80a23df94b4bc2c4c15618330330950 Mon Sep 17 00:00:00 2001
From: sangchengmeng <sangchengmeng@mail.ustc.edu.cn>
Date: Thu, 15 May 2025 21:13:21 +0800
Subject: [PATCH 2/4] [FIX]fix deadlock in shm

---
 lightllm/server/httpserver/manager.py | 28 ++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
index 5076cdd7e6..03b755dc19 100644
--- a/lightllm/server/httpserver/manager.py
+++ b/lightllm/server/httpserver/manager.py
@@ -52,6 +52,7 @@ def __init__(
 
         self.multinode_req_manager = None
         self.nnodes = args.nnodes
+        self.lock = asyncio.Lock()
         self.node_rank = args.node_rank
         self.transfer_lock = asyncio.Lock()  # the lock for transfer to next module in multi node mode.
         self.disable_abort = args.nnodes > 1 and args.dp == 1  # mulitnode dp=1 mode, disable abort
@@ -141,19 +142,20 @@ async def _alloc_resource(self, item: Union[ImageItem, AudioItem]):
     async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
         # 只有 P 和 NORMAL 节点需要真的管理多模态资源
         if self.pd_mode.is_P_or_NORMAL():
-            for img in multimodal_params.images:
-                self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
-                record = await self._alloc_resource(img)
-                img.uuid = record["id"]
-                img.token_id = record["token_id"]
-                img.token_num = record["token_num"]
-            for audio in multimodal_params.audios:
-                self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
-                record = await self._alloc_resource(audio)
-                audio.uuid = record["id"]
-                audio.token_id = record["token_id"]
-                audio.token_num = record["token_num"]
-        return
+            async with self.lock:
+                for img in multimodal_params.images:
+                    self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
+                    record = await self._alloc_resource(img)
+                    img.uuid = record["id"]
+                    img.token_id = record["token_id"]
+                    img.token_num = record["token_num"]
+                for audio in multimodal_params.audios:
+                    self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
+                    record = await self._alloc_resource(audio)
+                    audio.uuid = record["id"]
+                    audio.token_id = record["token_id"]
+                    audio.token_num = record["token_num"]
+            return
 
     async def _release_multimodal_resources(self, multimodal_params: MultimodalParams):
         # 只有 P 和 NORMAL 节点需要真的管理多模态资源

From 2862e1633525d0c45ab93be60ebb4a66442ad065 Mon Sep 17 00:00:00 2001
From: sangchengmeng <sangchengmeng@mail.ustc.edu.cn>
Date: Fri, 16 May 2025 17:09:19 +0800
Subject: [PATCH 3/4] [FIX]fix dead lock when alloc resource

---
 lightllm/server/httpserver/manager.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
index 03b755dc19..2dc753e2e1 100644
--- a/lightllm/server/httpserver/manager.py
+++ b/lightllm/server/httpserver/manager.py
@@ -52,7 +52,7 @@ def __init__(
 
         self.multinode_req_manager = None
         self.nnodes = args.nnodes
-        self.lock = asyncio.Lock()
+        self._resource_lock = asyncio.Lock()
         self.node_rank = args.node_rank
         self.transfer_lock = asyncio.Lock()  # the lock for transfer to next module in multi node mode.
         self.disable_abort = args.nnodes > 1 and args.dp == 1  # mulitnode dp=1 mode, disable abort
@@ -142,7 +142,13 @@ async def _alloc_resource(self, item: Union[ImageItem, AudioItem]):
     async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
         # 只有 P 和 NORMAL 节点需要真的管理多模态资源
         if self.pd_mode.is_P_or_NORMAL():
-            async with self.lock:
+            # Acquire the lock so that two concurrent requests cannot both
+            # allocate more records than the cache_capacity.
+            # For example, if cache_capacity is 10 and each request has 6 images,
+            # without the lock one request might allocate 5 images,
+            # then another request allocates 5 more images, filling cache_capacity,
+            # and both wait for space to free, causing a deadlock.
+            async with self._resource_lock:
                 for img in multimodal_params.images:
                     self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
                     record = await self._alloc_resource(img)

From bd2df3ddb4aea8ae6a0cd1ff1f0feb7a9f5e4401 Mon Sep 17 00:00:00 2001
From: sangchengmeng <sangchengmeng@mail.ustc.edu.cn>
Date: Fri, 16 May 2025 17:16:12 +0800
Subject: [PATCH 4/4] [FIX]fix dead lock when alloc resource

---
 lightllm/server/core/objs/req.py                      |  2 --
 .../server/embed_cache/impl/naive_memory_cache.py     |  5 -----
 lightllm/server/httpserver/manager.py                 | 11 +++--------
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/lightllm/server/core/objs/req.py b/lightllm/server/core/objs/req.py
index 294fd72c5b..c8d8476e51 100644
--- a/lightllm/server/core/objs/req.py
+++ b/lightllm/server/core/objs/req.py
@@ -200,11 +200,9 @@ def can_release(self):
         can_released_mark = self.can_released_mark
 
         if self.is_aborted and can_released_mark and ref_count_ok:
-            print("because of aborted, can release")
             return True
 
         if self.finish_status.is_finished() and can_released_mark and ref_count_ok and self.out_tokens_queue.is_empty():
-            print("because of finished, can release")
             return True
 
         return False
diff --git a/lightllm/server/embed_cache/impl/naive_memory_cache.py b/lightllm/server/embed_cache/impl/naive_memory_cache.py
index 36007cbada..c03b084c47 100644
--- a/lightllm/server/embed_cache/impl/naive_memory_cache.py
+++ b/lightllm/server/embed_cache/impl/naive_memory_cache.py
@@ -78,10 +78,6 @@ def _clear(self):
         t = time.time()
         for id, record in items:
             if record.ref <= 0 or t - record.visittime >= self.expired_secs:
-                if record.ref <= 0:
-                    logger.info(f"id {id}'s record ref is 0")
-                if t - record.visittime >= self.expired_secs:
-                    logger.info(f"id {id}'s record expired, because of time_expired")
                 if record.data:
                     free_shm(get_shm_name_data(id))
                 if record.embed:
@@ -133,7 +129,6 @@ def alloc(self, md5sum: str, token_num: int) -> dict:
             return {"id": record.id, "token_id": record.token_id, "token_num": record.token_num}
 
     def release(self, id: int) -> None:
-        logger.info(f"Releasing id {id}")
         with self.lock:
             self._records[id].ref -= 1
 
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
index 2dc753e2e1..e02eaaf796 100644
--- a/lightllm/server/httpserver/manager.py
+++ b/lightllm/server/httpserver/manager.py
@@ -142,12 +142,9 @@ async def _alloc_resource(self, item: Union[ImageItem, AudioItem]):
     async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
         # 只有 P 和 NORMAL 节点需要真的管理多模态资源
         if self.pd_mode.is_P_or_NORMAL():
-            # Acquire the lock so that two concurrent requests cannot both
-            # allocate more records than the cache_capacity.
-            # For example, if cache_capacity is 10 and each request has 6 images,
-            # without the lock one request might allocate 5 images,
-            # then another request allocates 5 more images, filling cache_capacity,
-            # and both wait for space to free, causing a deadlock.
+            # 这里的锁是为了 防止多个含有多张图片的请求 同时申请的record数量 大于cache_capacity，从而造成死锁的问题。
+            # 如果不加任何锁，假如请求1和请求2都有6张图片，而cache_capacity为10，
+            # 那么如果某一时刻shm中存在请求1的5张图和请求2的5张图，将会资源竞争产生死锁。
             async with self._resource_lock:
                 for img in multimodal_params.images:
                     self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
@@ -169,7 +166,6 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
             if multimodal_params is not None:
                 for img in multimodal_params.images:
                     if img.uuid is not None:
-                        logger.info(f"Releasing id {img.uuid}")
                         self.cache_client.root.release(img.uuid)
                         # 将 uuid 等 赋值为 None, 防止因为abort等异常情况造成重复释放异常
                         img.uuid = None
@@ -602,7 +598,6 @@ async def recycle_resource_loop(self):
             release_req_status: List[ReqStatus] = []
             for req_status in self.req_id_to_out_inf.values():
                 if req_status.can_release():
-                    logger.info(f"req_status {req_status.group_req_objs.group_req_id} can release")
                     release_req_status.append(req_status)
             for req_status in release_req_status:
                 self.req_id_to_out_inf.pop(req_status.group_req_objs.group_req_id, None)